Simulations.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  1 15:16:34 2011

@author: dave
__author__ = "David Verelst <dave@dtu.dk>"
__license__ = "GPL-2+"
"""

from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals
from __future__ import absolute_import
from builtins import dict
from io import open
from builtins import zip
from builtins import range
from builtins import str
from builtins import int
from future import standard_library
standard_library.install_aliases()
from builtins import object

# standard python library
import os
import subprocess as sproc
import copy
import zipfile
import shutil
import datetime
import math
import pickle
import re
# what is actually the difference between warnings and logging.warn?
# for which context is which better?
import warnings
import logging
from operator import itemgetter
from time import time
#import Queue
#import threading
#from multiprocessing import Pool

# numpy and scipy only used in HtcMaster._all_in_one_blade_tag
import numpy as np
import scipy
import scipy.interpolate as interpolate
#import matplotlib.pyplot as plt
import pandas as pd
import tables as tbl

# custom libraries
from wetb.prepost import misc
from wetb.prepost import windIO
from wetb.prepost import prepost
from wetb.dlc import high_level as dlc
from wetb.prepost.GenerateHydro import hydro_input
from wetb.utils.envelope import compute_envelope

def load_pickled_file(source):
    FILE = open(source, 'rb')
    result = pickle.load(FILE)
    FILE.close()
    return result

def save_pickle(source, variable):
    FILE = open(source, 'wb')
    pickle.dump(variable, FILE, protocol=2)
    FILE.close()

def write_file(file_path, file_contents, mode):
    """
    INPUT:
        file_path: path/to/file/name.csv
        string   : file contents is a string
        mode     : reading (r), writing (w), append (a),...
    """

    FILE = open(file_path, mode)
    FILE.write(file_contents)
    FILE.close()

def create_multiloop_list(iter_dict, debug=False):
    """
    Create a list based on multiple nested loops
    ============================================

    Considerd the following example

    >>> for v in range(V_start, V_end, V_delta):
    ...     for y in range(y_start, y_end, y_delta):
    ...         for c in range(c_start, c_end, c_delta):
    ...             print v, y, c

    Could be replaced by a list with all these combinations. In order to
    replicate this with create_multiloop_list, iter_dict should have
    the following structure

    >>> iter_dict = dict()
    >>> iter_dict['v'] = range(V_start, V_end, V_delta)
    >>> iter_dict['y'] = range(y_start, y_end, y_delta)
    >>> iter_dict['c'] = range(c_start, c_end, c_delta)
    >>> iter_list = create_multiloop_list(iter_dict)
    >>> for case in iter_list:
    ...     print case['v'], case['y'], case['c']

    Parameters
    ----------

    iter_dict : dictionary
        Key holds a valid tag as used in HtcMaster.tags. The corresponding
        value shouuld be a list of values to be considered.

    Output
    ------

    iter_list : list
        List containing dictionaries. Each entry is a combination of the
        given iter_dict keys.

    Example
    -------

    >>> iter_dict={'[wind]':[5,6,7],'[coning]':[0,-5,-10]}
    >>> create_multiloop_list(iter_dict)
    [{'[wind]': 5, '[coning]': 0},
     {'[wind]': 5, '[coning]': -5},
     {'[wind]': 5, '[coning]': -10},
     {'[wind]': 6, '[coning]': 0},
     {'[wind]': 6, '[coning]': -5},
     {'[wind]': 6, '[coning]': -10},
     {'[wind]': 7, '[coning]': 0},
     {'[wind]': 7, '[coning]': -5},
     {'[wind]': 7, '[coning]': -10}]
    """

    iter_list = []

    # fix the order of the keys
    key_order = list(iter_dict.keys())
    nr_keys = len(key_order)
    nr_values,indices = [],[]
    # determine how many items on each key
    for key in key_order:
        # each value needs to be an iterable! len() will fail if it isn't
        # count how many values there are for each key
        if type(iter_dict[key]).__name__ != 'list':
            print('%s does not hold a list' % key)
            raise ValueError('Each value in iter_dict has to be a list!')
        nr_values.append(len(iter_dict[key]))
        # create an initial indices list
        indices.append(0)

    if debug: print(nr_values, indices)

    go_on = True
    # keep track on which index you are counting, start at the back
    loopkey = nr_keys -1
    cc = 0
    while go_on:
        if debug: print(indices)

        # Each entry on the list is a dictionary with the parameter combination
        iter_list.append(dict())

        # save all the different combination into one list
        for keyi in range(len(key_order)):
            key = key_order[keyi]
            # add the current combination of values as one dictionary
            iter_list[cc][key] = iter_dict[key][indices[keyi]]

        # +1 on the indices of the last entry, the overflow principle
        indices[loopkey] += 1

        # cycle backwards thourgh all dimensions and propagate the +1 if the
        # current dimension is full. Hence overflow.
        for k in range(loopkey,-1,-1):
            # if the current dimension is over its max, set to zero and change
            # the dimension of the next. Remember we are going backwards
            if not indices[k] < nr_values[k] and k > 0:
                # +1 on the index of the previous dimension
                indices[k-1] += 1
                # set current loopkey index back to zero
                indices[k] = 0
                # if the previous dimension is not on max, break out
                if indices[k-1] < nr_values[k-1]:
                    break
            # if we are on the last dimension, break out if that is also on max
            elif k == 0 and not indices[k] < nr_values[k]:
                if debug: print(cc)
                go_on = False

        # fail safe exit mechanism...
        if cc > 20000:
            raise UserWarning('multiloop_list has already '+str(cc)+' items..')
            go_on = False

        cc += 1

    return iter_list

def local_shell_script(htc_dict, sim_id):
    """
    """
    shellscript = ''
    breakline = '"' + '*'*80 + '"'
    nr_cases = len(htc_dict)
    nr = 1
    for case in htc_dict:
        shellscript += 'echo ""' + '\n'
        shellscript += 'echo ' + breakline + '\n' + 'echo '
        shellscript += '" ===> Progress:'+str(nr)+'/'+str(nr_cases)+'"\n'
        # get a shorter version for the current cases tag_dict:
        scriptpath = os.path.join(htc_dict[case]['[run_dir]'], 'runall.sh')
        try:
            hawc2_exe = htc_dict[case]['[hawc2_exe]']
        except KeyError:
            hawc2_exe = 'hawc2mb.exe'
        htc_dir = htc_dict[case]['[htc_dir]']
        # log all warning messages: WINEDEBUG=-all!
        wine = 'WINEARCH=win32 WINEPREFIX=~/.wine32 wine'
        htc_target = os.path.join(htc_dir, case)
        shellscript += '%s %s %s \n' % (wine, hawc2_exe, htc_target)
        shellscript += 'echo ' + breakline + '\n'
        nr+=1

    write_file(scriptpath, shellscript, 'w')
    print('\nrun local shell script written to:')
    print(scriptpath)

def local_windows_script(cases, sim_id, nr_cpus=2):
    """
    """

    tot_cases = len(cases)
    i_script = 1
    i_case_script = 1
    cases_per_script = int(math.ceil(float(tot_cases)/float(nr_cpus)))
    # header of the new script, each process has its own copy
    header = ''
    header += 'rem\nrem\n'
    header += 'mkdir _%i_\n'
    # copy the data folder in case it holds a lot of .dat files
    header += 'robocopy .\data .\_%i_\data /e \n'
    # do not copy the following stuff
    exc_file_pat = ['*.log', '*.dat', '*.sel', '*.xls*', '*.bat']
    exc_dir_pat = ['_*_', 'data']
    header += 'robocopy .\ .\_%i_ /e '
    header += (' /xf ' + ' /xf '.join(exc_file_pat))
    header += (' /xd ' + ' /xd '.join(exc_dir_pat))
    header += '\n'
    header += 'cd _%i_\n'
    header += 'rem\nrem\n'
    footer = ''
    footer += 'rem\nrem\n'
    footer += 'cd ..\n'
    footer += 'robocopy .\_%i_\ /e .\ /move\n'
    footer += 'rem\nrem\n'
    shellscript = header % (i_script, i_script, i_script, i_script)

    stop = False

    for i_case, (cname, case) in enumerate(cases.items()):
#    for i_case, case in enumerate(sorted(cases.keys())):

        shellscript += 'rem\nrem\n'
        shellscript += 'rem ===> Progress: %3i / %3i\n' % (i_case+1, tot_cases)
        # copy turbulence from data base, if applicable
        if case['[turb_db_dir]'] is not None:
            # we are one dir up in cpu exe dir
            turb = case['[turb_base_name]'] + '*.bin'
            dbdir = os.path.join('./../', case['[turb_db_dir]'], turb)
            dbdir = dbdir.replace('/', '\\')
            rpl = (dbdir, case['[turb_dir]'].replace('/', '\\'))
            shellscript += 'copy %s %s\n' % rpl

        # get a shorter version for the current cases tag_dict:
        scriptpath = '%srunall-%i.bat' % (case['[run_dir]'], i_script)
        htcpath = case['[htc_dir]'][:-1].replace('/', '\\') # ditch the /
        try:
            hawc2_exe = case['[hawc2_exe]']
        except KeyError:
            hawc2_exe = 'hawc2mb.exe'
        rpl = (hawc2_exe.replace('/', '\\'), htcpath, cname.replace('/', '\\'))
        shellscript += "%s .\\%s\\%s\n" % rpl
        # copy back to data base directory if they do not exists there
        # remove turbulence file again, if copied from data base
        if case['[turb_db_dir]'] is not None:
            # copy back if it does not exist in the data base
            # IF EXIST "c:\test\file.ext"  (move /y "C:\test\file.ext" "C:\quality\" )
            turbu = case['[turb_base_name]'] + 'u.bin'
            turbv = case['[turb_base_name]'] + 'v.bin'
            turbw = case['[turb_base_name]'] + 'w.bin'
            dbdir = os.path.join('./../', case['[turb_db_dir]'])
            for tu in (turbu, turbv, turbw):
                tu_db = os.path.join(dbdir, tu).replace('/', '\\')
                tu_run = os.path.join(case['[turb_dir]'], tu).replace('/', '\\')
                rpl = (tu_db, tu_run, dbdir.replace('/', '\\'))
                shellscript += 'IF NOT EXIST "%s" move /y "%s" "%s"\n' % rpl
            # remove turbulence from run dir
            allturb = os.path.join(case['[turb_dir]'], '*.*')
            allturb = allturb.replace('/', '\\')
            # do not prompt for delete confirmation: /Q
            shellscript += 'del /Q "%s"\n' % allturb

        if i_case_script >= cases_per_script:
            # footer: copy all files back
            shellscript += footer % i_script
            stop = True
            write_file(scriptpath, shellscript, 'w')
            print('\nrun local shell script written to:')
            print(scriptpath)

            # header of the new script, each process has its own copy
            # but only if there are actually jobs left
            if i_case+1 < tot_cases:
                i_script += 1
                i_case_script = 1
                shellscript = header % (i_script, i_script, i_script, i_script)
                stop = False
        else:
            i_case_script += 1

    # we might have missed the footer of a partial script
    if not stop:
        shellscript += footer % i_script
        write_file(scriptpath, shellscript, 'w')
        print('\nrun local shell script written to:')
        print(scriptpath)

def run_local_ram(cases, check_log=True):

    ram_root = '/tmp/HAWC2/'

    if not os.path.exists(ram_root):
        os.makedirs(ram_root)

    print('copying data from run_dir to RAM...', end='')

    # first copy everything to RAM
    for ii, case in enumerate(cases):
        # all tags for the current case
        tags = cases[case]
        run_dir = copy.copy(tags['[run_dir]'])
        run_dir_ram = ram_root + tags['[sim_id]']
        if not os.path.exists(run_dir_ram):
            os.makedirs(run_dir_ram)
        # and also change the run dir so we can launch it easily
        tags['[run_dir]'] = run_dir_ram + '/'
        for root, dirs, files in os.walk(run_dir):
            run_dir_base = os.path.commonprefix([root, run_dir])
            cdir = root.replace(run_dir_base, '')
            dstbase = os.path.join(run_dir_ram, cdir)
            if not os.path.exists(dstbase):
                os.makedirs(dstbase)
            for fname in files:
                src = os.path.join(root, fname)
                dst = os.path.join(dstbase, fname)
                shutil.copy2(src, dst)

    print('done')

    # launch from RAM
    run_local(cases, check_log=check_log)
    # change run_dir back to original
    for ii, case in enumerate(cases):
        tags = cases[case]
        tags['[run_dir]'] = run_dir

    print('copying data from RAM back to run_dir')
    print('run_dir: %s' % run_dir)

    # and copy everything back
    for root, dirs, files in os.walk(run_dir_ram):
        run_dir_base = os.path.commonprefix([root, run_dir_ram])
        cdir = root.replace(run_dir_base, '')
        # in case it is the same
        if len(cdir) == 0:
            pass
        # join doesn't work if cdir has a leading / ?? so drop it
        elif cdir[0] == '/':
            dstbase = os.path.join(run_dir, cdir[1:])
        for fname in files:
            src = os.path.join(root, fname)
            dst = os.path.join(dstbase, fname)
            if not os.path.exists(dstbase):
                os.makedirs(dstbase)
            try:
                shutil.copy2(src, dst)
            except Exception as e:
                print('src:', src)
                print('dst:', dst)
                print(e)
                print()
                pass

    print('...done')

    return cases


def run_local(cases, silent=False, check_log=True):
    """
    Run all HAWC2 simulations locally from cases
    ===============================================

    Run all case present in a cases dict locally and wait until HAWC2 is ready.

    In verbose mode, each HAWC2 simulation is also timed

    Parameters
    ----------

    cases : dict{ case : dict{tag : value} }
        Dictionary where each case is a key and its value a dictionary holding
        all the tags/value pairs as used for that case

    check_log : boolean, default=False
        Check the log file emmidiately after execution of the HAWC2 case

    silent : boolean, default=False
        When False, usefull information will be printed and the HAWC2
        simulation time will be calculated from the Python perspective. The
        silent variable is also passed on to logcheck_case

    Returns
    -------

    cases : dict{ case : dict{tag : value} }
        Update cases with the STDOUT of the respective HAWC2 simulation

    """

    # remember the current working directory
    cwd = str(os.getcwd())
    nr = len(cases)
    if not silent:
        print('')
        print('='*79)
        print('Be advised, launching %i HAWC2 simulation(s) sequentially' % nr)
        print('run dir: %s' % cases[list(cases.keys())[0]]['[run_dir]'])
        print('')

    if check_log:
        errorlogs = ErrorLogs(silent=silent)

    for ii, case in enumerate(cases):
        # all tags for the current case
        tags = cases[case]
        # for backward compatibility assume default HAWC2 executable
        try:
            hawc2_exe = tags['[hawc2_exe]']
        except KeyError:
            hawc2_exe = 'hawc2-latest'
        # TODO: if a turbulence data base is set, copy the files from there

        # the launch command
        cmd  = 'WINEDEBUG=-all WINEARCH=win32 WINEPREFIX=~/.wine32 wine'
        cmd += " %s %s%s" % (hawc2_exe, tags['[htc_dir]'], case)
        # remove any escaping in tags and case for security reasons
        cmd = cmd.replace('\\','')
        # browse to the correct launch path for the HAWC2 simulation
        os.chdir(tags['[run_dir]'])
        # create the required directories
        dirkeys = ['[data_dir]', '[htc_dir]', '[res_dir]', '[log_dir]',
                   '[eigenfreq_dir]', '[animation_dir]', '[turb_dir]',
                   '[wake_dir]', '[meander_dir]', '[opt_dir]', '[control_dir]',
                   '[mooring_dir]', '[hydro_dir]', '[externalforce]']
        for dirkey in dirkeys:
            if tags[dirkey]:
                if not os.path.exists(tags[dirkey]):
                    os.makedirs(tags[dirkey])

        if not silent:
            start = time()
            progress = '%4i/%i  : %s%s' % (ii+1, nr, tags['[htc_dir]'], case)
            print('*'*75)
            print(progress)

        # and launch the HAWC2 simulation
        p = sproc.Popen(cmd,stdout=sproc.PIPE,stderr=sproc.STDOUT,shell=True)

        # p.wait() will lock the current shell until p is done
        # p.stdout.readlines() checks if there is any output, but also locks
        # the thread if nothing comes back
        # save the output that HAWC2 sends to the shell to the cases
        # note that this is a list, each item holding a line
        cases[case]['sim_STDOUT'] = p.stdout.readlines()
        # wait until HAWC2 finished doing its magic
        p.wait()

        if not silent:
            # print(the simulation command line output
            print(' ' + '-'*75)
            print(''.join(cases[case]['sim_STDOUT']))
            print(' ' + '-'*75)
            # caclulation time
            stp = time() - start
            stpmin = stp/60.
            print('HAWC2 execution time: %8.2f sec (%8.2f min)' % (stp,stpmin))

        # where there any errors in the output? If yes, abort
        for k in cases[case]['sim_STDOUT']:
            kstart = k[:14]
            if kstart in [' *** ERROR ***', 'forrtl: severe']:
                cases[case]['[hawc2_sim_ok]'] = False
                #raise UserWarning, 'Found error in HAWC2 STDOUT'
            else:
                cases[case]['[hawc2_sim_ok]'] = True

        # check the log file strait away if required
        if check_log:
            start = time()
            errorlogs = logcheck_case(errorlogs, cases, case, silent=silent)
            stop = time() - start
            if case.endswith('.htc'):
                kk = case[:-4] + '.log'
            else:
                kk = case + '.log'
            errors = errorlogs.MsgListLog2[kk][0]
            exitok = errorlogs.MsgListLog2[kk][1]
            if not silent:
                print('log checks took %5.2f sec' % stop)
                print('    found error: ', errors)
                print(' exit correctly: ', exitok)
                print('*'*75)
                print()
            # also save in cases
            if not errors and exitok:
                cases[case]['[hawc2_sim_ok]'] = True
            else:
                cases[case]['[hawc2_sim_ok]'] = False

    if check_log:
        # take the last case to determine sim_id, run_dir and log_dir
        sim_id = cases[case]['[sim_id]']
        run_dir = cases[case]['[run_dir]']
        log_dir = cases[case]['[log_dir]']
        # save the extended (.csv format) errorlog list?
        # but put in one level up, so in the logfiles folder directly
        errorlogs.ResultFile = sim_id + '_ErrorLog.csv'
        # use the model path of the last encoutered case in cases
        errorlogs.PathToLogs = os.path.join(run_dir, log_dir)
        errorlogs.save()

    # just in case, browse back the working path relevant for the python magic
    os.chdir(cwd)
    if not silent:
        print('\nHAWC2 has done all of its sequential magic!')
        print('='*79)
        print('')

    return cases


def prepare_launch(iter_dict, opt_tags, master, variable_tag_func,
                write_htc=True, runmethod='none', verbose=False,
                copyback_turb=True, msg='', silent=False, check_log=True,
                update_cases=False, ignore_non_unique=False, wine_appendix='',
                run_only_new=False, windows_nr_cpus=2, wine_64bit=False,
                pbs_fname_appendix=True, short_job_names=True, qsub='',
                update_model_data=True, maxcpu=1, pyenv='wetb_py3'):
    """
    Create the htc files, pbs scripts and replace the tags in master file
    =====================================================================

    Do not use any uppercase letters in the filenames, since HAWC2 will
    convert all of them to lower case results file names (.sel, .dat, .log)

    create sub folders according to sim_id, in order to not create one
    folder for the htc, results, logfiles which grows very large in due
    time!!

    opt_tags is a list of dictionaries of tags:
        [ {tag1=12,tag2=23,..},{tag1=11, tag2=33, tag9=5,...},...]
    for each wind, yaw and coning combi, each tag dictionary in the list
    will be set.

    Make sure to always define all dictionary keys in each list, otherwise
    the value of the first appareance will remain set for the remaining
    simulations in the list.
    For instance, in the example above, if tag9=5 is not set for subsequent
    lists, tag9 will remain having value 5 for these subsequent sets

    The tags for each case are consequently set in following order (or
    presedence):
        * master
        * opt_tags
        * iter_dict
        * variable_tag_func

    Parameters
    ----------

    iter_dict : dict

    opt_tags : list

    master : HtcMaster object

    variable_tag_func : function object

    write_htc : boolean, default=True

    verbose : boolean, default=False

    runmethod : {'none' (default),'pbs','linux-script','local',
                 'local-ram', 'windows-script'}
        Specify how/what to run where. For local, each case in cases is
        run locally via python directly. If set to 'linux-script' a shell
        script is written to run all cases locally sequential. If set to
        'pbs', PBS scripts are written for a cluster (e.g. Gorm/jess).
        A Windows batch script is written in case of windows-script, and is
        used in combination with windows_nr_cpus.

    msg : str, default=''
        A descriptive message of the simulation series is saved at
        "post_dir + master.tags['[sim_id]'] + '_tags.txt'". Additionally, this
         tagfile also holds the opt_tags and iter_dict values.

    update_cases : boolean, default=False
        If True, a current cases dictionary can be updated with new simulations

    qsub : str, default=''
        Valid options are 'time' (use with launch), 'depend' (use with launch.py
        --depend) or '' (use with launch.py).
        Empty string means there are no tags placed in the pbs file, and
        consequently the pbs file can be submitted as is. When using
        qsub='time', a start time option is inserted with a start time tag
        that has to be set at launch time. With 'depend', a job_id dependency
        line is added, and when launching the job this dependency needs to
        specified.

    update_model_data : default=True
        If set to False, the zip file will not be created, and the data files
        are not copied to the run_dir. Use this when only updating the htc
        files.

    Returns
    -------

    cases : dict{ case : dict{tag : value} }
        Dictionary where each case is a key and its value a dictionary holding
        all the tags/value pairs as used for that case

    """

    post_dir = master.tags['[post_dir]']
    fpath_post_base = os.path.join(post_dir, master.tags['[sim_id]'])
    # either take a currently existing cases dictionary, or create a new one
    if update_cases:
        try:
            FILE = open(fpath_post_base + '.pkl', 'rb')
            cases = pickle.load(FILE)
            FILE.close()
            print('updating cases for %s' % master.tags['[sim_id]'])
        except IOError:
            print(79*'=')
            print("failed to load cases dict for updating simd_id at:")
            print(fpath_post_base + '.pkl')
            print(79*'=')
            cases = {}
        # but only run the new cases
        cases_to_run = {}
    else:
        cases = {}

    # if empty, just create a dummy item so we get into the loops
    if len(iter_dict) == 0:
        iter_dict = {'__dummy__': [0]}
    combi_list = create_multiloop_list(iter_dict)

    # load the master htc file as a string under the master.tags
    master.loadmaster()
    # save a copy of the default values
    mastertags_default = copy.copy(master.tags)

    # ignore if the opt_tags is empty, will result in zero
    if len(opt_tags) > 0:
        sim_total = len(combi_list)*len(opt_tags)
    else:
        sim_total = len(combi_list)
        # if no opt_tags specified, create an empty dummy tag
        opt_tags = [dict({'__DUMMY_TAG__' : 0})]
    sim_nr = 0

    # make sure all the required directories are in place at run_dir
#    master.create_run_dir()
#    master.init_multithreads()

    # cycle thourgh all the combinations
    for it in combi_list:
        for ot in opt_tags:
            sim_nr += 1
            # starting point should always be the default values. This is
            # important when a previous case had a certain tag defined, and in
            # the next case it is absent.
            master.tags = mastertags_default.copy()
            # update the tags from the opt_tags list
            if not '__DUMMY_TAG__' in ot:
                master.tags.update(ot)
            # update the tags set in the combi_list
            master.tags.update(it)
            # force lower case values as defined in output_dirs
            master.lower_case_output()
            # -----------------------------------------------------------
            # start variable tags update
            if variable_tag_func is not None:
                master = variable_tag_func(master)
            # end variable tags
            # -----------------------------------------------------------
            if not silent:
                print('htc progress: ' + format(sim_nr, '3.0f') + '/' + \
                       format(sim_total, '3.0f'))

            if verbose:
                print('===master.tags===\n', master.tags)

            # returns a dictionary with all the tags used for this
            # specific case
            htc = master.createcase(write_htc=write_htc)
            master.create_run_dir()
            #htc=master.createcase_check(cases_repo,write_htc=write_htc)

            # make sure the current cases is unique!
            if not ignore_non_unique:
                if list(htc.keys())[0] in cases:
                    msg = 'non unique case in cases: %s' % list(htc.keys())[0]
                    raise KeyError(msg)

            # save in the big cases. Note that values() gives a copy!
            cases[list(htc.keys())[0]] = list(htc.values())[0]
            # if we have an update scenario, keep track of the cases we want
            # to run again. This prevents us from running all cases on every
            # update
            if run_only_new:
                cases_to_run[list(htc.keys())[0]] = list(htc.values())[0]

            if verbose:
                print('created cases for: %s.htc\n' % master.tags['[case_id]'])

#    print(master.queue.get())

    # only copy data and create zip after all htc files have been created.
    # Note that createcase could also creat other input files
    # create the execution folder structure and copy all data to it
    # FIXME: this approach only considers the tags as set in the last case!
    if update_model_data:
        master.copy_model_data()
        # create the zip file
        master.create_model_zip()

    # create directory if post_dir does not exists
    try:
        os.makedirs(post_dir)
    except OSError:
        pass
    FILE = open(fpath_post_base + '.pkl', 'wb')
    pickle.dump(cases, FILE, protocol=2)
    FILE.close()

    if not silent:
        print('\ncases saved at:')
        print(fpath_post_base + '.pkl')

    # also save the iter_dict and opt_tags in a text file for easy reference
    # or quick checks on what each sim_id actually contains
    # sort the taglist for convienent reading/comparing
    tagfile = msg + '\n\n'
    tagfile += '='*79 + '\n'
    tagfile += 'iter_dict\n'.rjust(30)
    tagfile += '='*79 + '\n'
    iter_dict_list = sorted(iter(iter_dict.items()), key=itemgetter(0))
    for k in iter_dict_list:
        tagfile += str(k[0]).rjust(30) + ' : ' + str(k[1]).ljust(20) + '\n'

    tagfile += '\n'
    tagfile += '='*79 + '\n'
    tagfile += 'opt_tags\n'.rjust(30)
    tagfile += '='*79 + '\n'
    for k in opt_tags:
        tagfile += '\n'
        tagfile += '-'*79 + '\n'
        tagfile += 'opt_tags set\n'.rjust(30)
        tagfile += '-'*79 + '\n'
        opt_dict = sorted(iter(k.items()), key=itemgetter(0), reverse=False)
        for kk in opt_dict:
            tagfile += str(kk[0]).rjust(30)+' : '+str(kk[1]).ljust(20) + '\n'
    if update_cases:
        mode = 'a'
    else:
        mode = 'w'
    write_file(fpath_post_base + '_tags.txt', tagfile, mode)

    if run_only_new:
        cases = cases_to_run

    launch(cases, runmethod=runmethod, verbose=verbose, check_log=check_log,
           copyback_turb=copyback_turb, qsub=qsub, wine_appendix=wine_appendix,
           windows_nr_cpus=windows_nr_cpus, short_job_names=short_job_names,
           pbs_fname_appendix=pbs_fname_appendix, silent=silent, maxcpu=maxcpu,
           pyenv=pyenv, wine_64bit=wine_64bit)

    return cases

def prepare_relaunch(cases, runmethod='gorm', verbose=False, write_htc=True,
                     copyback_turb=True, silent=False, check_log=True):
    """
    Instead of redoing everything, we know recreate the HTC file for those
    in the given cases dict. Nothing else changes. The data and zip files
    are not updated, the convience tagfile is not recreated. However, the
    saved (pickled) cases dict corresponding to the sim_id is updated!

    This method is usefull to correct mistakes made for some cases.

    It is adviced to not change the case_id, sim_id, from the cases.
    """

    # initiate the HtcMaster object, load the master file
    master = HtcMaster()
    # for invariant tags, load random case. Necessary before we can load
    # the master file, otherwise we don't know which master to load
    master.tags = cases[list(cases.keys())[0]]
    master.loadmaster()

    # load the original cases dict
    post_dir = master.tags['[post_dir]']
    FILE = open(post_dir + master.tags['[sim_id]'] + '.pkl', 'rb')
    cases_orig = pickle.load(FILE)
    FILE.close()

    sim_nr = 0
    sim_total = len(cases)
    for case, casedict in cases.items():
        sim_nr += 1

        # set all the tags in the HtcMaster file
        master.tags = casedict
        # returns a dictionary with all the tags used for this
        # specific case
        htc = master.createcase(write_htc=write_htc)
        #htc=master.createcase_check(cases_repo,write_htc=write_htc)

        if not silent:
            print('htc progress: ' + format(sim_nr, '3.0f') + '/' + \
                   format(sim_total, '3.0f'))

        if verbose:
            print('===master.tags===\n', master.tags)

        # make sure the current cases already exists, otherwise we are not
        # relaunching!
        if case not in cases_orig:
            msg = 'relaunch only works for existing cases: %s' % case
            raise KeyError(msg)

        # save in the big cases. Note that values() gives a copy!
        # remark, what about the copying done at the end of master.createcase?
        # is that redundant then?
        cases[list(htc.keys())[0]] = list(htc.values())[0]

        if verbose:
            print('created cases for: %s.htc\n' % master.tags['[case_id]'])

    launch(cases, runmethod=runmethod, verbose=verbose, check_log=check_log,
           copyback_turb=copyback_turb, silent=silent)

    # update the original file: overwrite the newly set cases
    FILE = open(post_dir + master.tags['[sim_id]'] + '.pkl', 'wb')
    cases_orig.update(cases)
    pickle.dump(cases_orig, FILE, protocol=2)
    FILE.close()

def prepare_launch_cases(cases, runmethod='gorm', verbose=False,write_htc=True,
                         copyback_turb=True, silent=False, check_log=True,
                         variable_tag_func=None, sim_id_new=None):
    """
    Same as prepare_launch, but now the input is just a cases object (cao).
    If relaunching some earlier defined simulations, make sure to at least
    rename the sim_id, otherwise it could become messy: things end up in the
    same folder, sim_id post file get overwritten, ...

    In case you do not use a variable_tag_fuc, make sure all your tags are
    defined in cases. First and foremost, this means that the case_id does not
    get updated to have a new sim_id, the path's are not updated, etc

    When given a variable_tag_func, make sure it is properly
    defined: do not base a variable tag's value on itself to avoid value chains

    The master htc file will be loaded and alls tags defined in the cases dict
    will be applied to it as is.
    """

    # initiate the HtcMaster object, load the master file
    master = HtcMaster()
    # for invariant tags, load random case. Necessary before we can load
    # the master file, otherwise we don't know which master to load
    master.tags = cases[list(cases.keys())[0]]
    # load the master htc file as a string under the master.tags
    master.loadmaster()
    # create the execution folder structure and copy all data to it
    # but reset to the correct launch dirs first
    sim_id = master.tags['[sim_id]']
    if runmethod in ['local', 'local-script', 'none']:
        path = '/home/dave/PhD_data/HAWC2_results/ojf_post/%s/' % sim_id
        master.tags['[run_dir]'] = path
    elif runmethod == 'jess':
        master.tags['[run_dir]'] = '/mnt/jess/HAWC2/ojf_post/%s/' % sim_id
    elif runmethod == 'gorm':
        master.tags['[run_dir]'] = '/mnt/gorm/HAWC2/ojf_post/%s/' % sim_id
    else:
        msg='unsupported runmethod, options: none, local, thyra, gorm, opt'
        raise ValueError(msg)

    master.create_run_dir()
    master.copy_model_data()
    # create the zip file
    master.create_model_zip()

    sim_nr = 0
    sim_total = len(cases)

    # for safety, create a new cases dict. At the end of the ride both cases
    # and cases_new should be identical!
    cases_new = {}

    # cycle thourgh all the combinations
    for case, casedict in cases.items():
        sim_nr += 1

        sim_id = casedict['[sim_id]']
        # reset the launch dirs
        if runmethod in ['local', 'local-script', 'none']:
            path = '/home/dave/PhD_data/HAWC2_results/ojf_post/%s/' % sim_id
            casedict['[run_dir]'] = path
        elif runmethod == 'thyra':
            casedict['[run_dir]'] = '/mnt/thyra/HAWC2/ojf_post/%s/' % sim_id
        elif runmethod == 'gorm':
            casedict['[run_dir]'] = '/mnt/gorm/HAWC2/ojf_post/%s/' % sim_id
        else:
            msg='unsupported runmethod, options: none, local, thyra, gorm, opt'
            raise ValueError(msg)

        # -----------------------------------------------------------
        # set all the tags in the HtcMaster file
        master.tags = casedict
        # apply the variable tags if applicable
        if variable_tag_func:
            master = variable_tag_func(master)
        elif sim_id_new:
            # TODO: finish this
            # replace all the sim_id occurences with the updated one
            # this means also the case_id tag changes!
            pass
        # -----------------------------------------------------------

        # returns a dictionary with all the tags used for this specific case
        htc = master.createcase(write_htc=write_htc)

        if not silent:
            print('htc progress: ' + format(sim_nr, '3.0f') + '/' + \
                   format(sim_total, '3.0f'))

        if verbose:
            print('===master.tags===\n', master.tags)

        # make sure the current cases is unique!
        if list(htc.keys())[0] in cases_new:
            msg = 'non unique case in cases: %s' % list(htc.keys())[0]
            raise KeyError(msg)
        # save in the big cases. Note that values() gives a copy!
        # remark, what about the copying done at the end of master.createcase?
        # is that redundant then?
        cases_new[list(htc.keys())[0]] = list(htc.values())[0]

        if verbose:
            print('created cases for: %s.htc\n' % master.tags['[case_id]'])

    post_dir = master.tags['[post_dir]']

    # create directory if post_dir does not exists
    try:
        os.makedirs(post_dir)
    except OSError:
        pass
    FILE = open(post_dir + master.tags['[sim_id]'] + '.pkl', 'wb')
    pickle.dump(cases_new, FILE, protocol=2)
    FILE.close()

    if not silent:
        print('\ncases saved at:')
        print(post_dir + master.tags['[sim_id]'] + '.pkl')

    launch(cases_new, runmethod=runmethod, verbose=verbose,
           copyback_turb=copyback_turb, check_log=check_log)

    return cases_new


def launch(cases, runmethod='none', verbose=False, copyback_turb=True,