# -*- coding: utf-8 -*- """ Created on Tue Nov 1 15:16:34 2011 @author: dave __author__ = "David Verelst <dave@dtu.dk>" __license__ = "GPL-2+" """ from __future__ import print_function from __future__ import division from __future__ import unicode_literals from __future__ import absolute_import from builtins import dict from io import open from builtins import zip from builtins import range from builtins import str from builtins import int from future import standard_library standard_library.install_aliases() from builtins import object # standard python library import os import subprocess as sproc import copy import zipfile import shutil import datetime import math import pickle import re # what is actually the difference between warnings and logging.warn? # for which context is which better? import warnings import logging from operator import itemgetter from time import time #import Queue #import threading #from multiprocessing import Pool # numpy and scipy only used in HtcMaster._all_in_one_blade_tag import numpy as np import scipy import scipy.interpolate as interpolate #import matplotlib.pyplot as plt import pandas as pd import tables as tbl # custom libraries from wetb.prepost import misc from wetb.prepost import windIO from wetb.prepost import prepost from wetb.dlc import high_level as dlc from wetb.prepost.GenerateHydro import hydro_input from wetb.utils.envelope import compute_envelope def load_pickled_file(source): FILE = open(source, 'rb') result = pickle.load(FILE) FILE.close() return result def save_pickle(source, variable): FILE = open(source, 'wb') pickle.dump(variable, FILE, protocol=2) FILE.close() def write_file(file_path, file_contents, mode): """ INPUT: file_path: path/to/file/name.csv string : file contents is a string mode : reading (r), writing (w), append (a),... """ FILE = open(file_path, mode) FILE.write(file_contents) FILE.close() def create_multiloop_list(iter_dict, debug=False): """ Create a list based on multiple nested loops ============================================ Considerd the following example >>> for v in range(V_start, V_end, V_delta): ... for y in range(y_start, y_end, y_delta): ... for c in range(c_start, c_end, c_delta): ... print v, y, c Could be replaced by a list with all these combinations. In order to replicate this with create_multiloop_list, iter_dict should have the following structure >>> iter_dict = dict() >>> iter_dict['v'] = range(V_start, V_end, V_delta) >>> iter_dict['y'] = range(y_start, y_end, y_delta) >>> iter_dict['c'] = range(c_start, c_end, c_delta) >>> iter_list = create_multiloop_list(iter_dict) >>> for case in iter_list: ... print case['v'], case['y'], case['c'] Parameters ---------- iter_dict : dictionary Key holds a valid tag as used in HtcMaster.tags. The corresponding value shouuld be a list of values to be considered. Output ------ iter_list : list List containing dictionaries. Each entry is a combination of the given iter_dict keys. Example ------- >>> iter_dict={'[wind]':[5,6,7],'[coning]':[0,-5,-10]} >>> create_multiloop_list(iter_dict) [{'[wind]': 5, '[coning]': 0}, {'[wind]': 5, '[coning]': -5}, {'[wind]': 5, '[coning]': -10}, {'[wind]': 6, '[coning]': 0}, {'[wind]': 6, '[coning]': -5}, {'[wind]': 6, '[coning]': -10}, {'[wind]': 7, '[coning]': 0}, {'[wind]': 7, '[coning]': -5}, {'[wind]': 7, '[coning]': -10}] """ iter_list = [] # fix the order of the keys key_order = list(iter_dict.keys()) nr_keys = len(key_order) nr_values,indices = [],[] # determine how many items on each key for key in key_order: # each value needs to be an iterable! len() will fail if it isn't # count how many values there are for each key if type(iter_dict[key]).__name__ != 'list': print('%s does not hold a list' % key) raise ValueError('Each value in iter_dict has to be a list!') nr_values.append(len(iter_dict[key])) # create an initial indices list indices.append(0) if debug: print(nr_values, indices) go_on = True # keep track on which index you are counting, start at the back loopkey = nr_keys -1 cc = 0 while go_on: if debug: print(indices) # Each entry on the list is a dictionary with the parameter combination iter_list.append(dict()) # save all the different combination into one list for keyi in range(len(key_order)): key = key_order[keyi] # add the current combination of values as one dictionary iter_list[cc][key] = iter_dict[key][indices[keyi]] # +1 on the indices of the last entry, the overflow principle indices[loopkey] += 1 # cycle backwards thourgh all dimensions and propagate the +1 if the # current dimension is full. Hence overflow. for k in range(loopkey,-1,-1): # if the current dimension is over its max, set to zero and change # the dimension of the next. Remember we are going backwards if not indices[k] < nr_values[k] and k > 0: # +1 on the index of the previous dimension indices[k-1] += 1 # set current loopkey index back to zero indices[k] = 0 # if the previous dimension is not on max, break out if indices[k-1] < nr_values[k-1]: break # if we are on the last dimension, break out if that is also on max elif k == 0 and not indices[k] < nr_values[k]: if debug: print(cc) go_on = False # fail safe exit mechanism... if cc > 20000: raise UserWarning('multiloop_list has already '+str(cc)+' items..') go_on = False cc += 1 return iter_list def local_shell_script(htc_dict, sim_id): """ """ shellscript = '' breakline = '"' + '*'*80 + '"' nr_cases = len(htc_dict) nr = 1 for case in htc_dict: shellscript += 'echo ""' + '\n' shellscript += 'echo ' + breakline + '\n' + 'echo ' shellscript += '" ===> Progress:'+str(nr)+'/'+str(nr_cases)+'"\n' # get a shorter version for the current cases tag_dict: scriptpath = os.path.join(htc_dict[case]['[run_dir]'], 'runall.sh') try: hawc2_exe = htc_dict[case]['[hawc2_exe]'] except KeyError: hawc2_exe = 'hawc2mb.exe' htc_dir = htc_dict[case]['[htc_dir]'] # log all warning messages: WINEDEBUG=-all! wine = 'WINEARCH=win32 WINEPREFIX=~/.wine32 wine' htc_target = os.path.join(htc_dir, case) shellscript += '%s %s %s \n' % (wine, hawc2_exe, htc_target) shellscript += 'echo ' + breakline + '\n' nr+=1 write_file(scriptpath, shellscript, 'w') print('\nrun local shell script written to:') print(scriptpath) def local_windows_script(cases, sim_id, nr_cpus=2): """ """ tot_cases = len(cases) i_script = 1 i_case_script = 1 cases_per_script = int(math.ceil(float(tot_cases)/float(nr_cpus))) # header of the new script, each process has its own copy header = '' header += 'rem\nrem\n' header += 'mkdir _%i_\n' # copy the data folder in case it holds a lot of .dat files header += 'robocopy .\data .\_%i_\data /e \n' # do not copy the following stuff exc_file_pat = ['*.log', '*.dat', '*.sel', '*.xls*', '*.bat'] exc_dir_pat = ['_*_', 'data'] header += 'robocopy .\ .\_%i_ /e ' header += (' /xf ' + ' /xf '.join(exc_file_pat)) header += (' /xd ' + ' /xd '.join(exc_dir_pat)) header += '\n' header += 'cd _%i_\n' header += 'rem\nrem\n' footer = '' footer += 'rem\nrem\n' footer += 'cd ..\n' footer += 'robocopy .\_%i_\ /e .\ /move\n' footer += 'rem\nrem\n' shellscript = header % (i_script, i_script, i_script, i_script) stop = False for i_case, (cname, case) in enumerate(cases.items()): # for i_case, case in enumerate(sorted(cases.keys())): shellscript += 'rem\nrem\n' shellscript += 'rem ===> Progress: %3i / %3i\n' % (i_case+1, tot_cases) # copy turbulence from data base, if applicable if case['[turb_db_dir]'] is not None: # we are one dir up in cpu exe dir turb = case['[turb_base_name]'] + '*.bin' dbdir = os.path.join('./../', case['[turb_db_dir]'], turb) dbdir = dbdir.replace('/', '\\') rpl = (dbdir, case['[turb_dir]'].replace('/', '\\')) shellscript += 'copy %s %s\n' % rpl # get a shorter version for the current cases tag_dict: scriptpath = '%srunall-%i.bat' % (case['[run_dir]'], i_script) htcpath = case['[htc_dir]'][:-1].replace('/', '\\') # ditch the / try: hawc2_exe = case['[hawc2_exe]'] except KeyError: hawc2_exe = 'hawc2mb.exe' rpl = (hawc2_exe.replace('/', '\\'), htcpath, cname.replace('/', '\\')) shellscript += "%s .\\%s\\%s\n" % rpl # copy back to data base directory if they do not exists there # remove turbulence file again, if copied from data base if case['[turb_db_dir]'] is not None: # copy back if it does not exist in the data base # IF EXIST "c:\test\file.ext" (move /y "C:\test\file.ext" "C:\quality\" ) turbu = case['[turb_base_name]'] + 'u.bin' turbv = case['[turb_base_name]'] + 'v.bin' turbw = case['[turb_base_name]'] + 'w.bin' dbdir = os.path.join('./../', case['[turb_db_dir]']) for tu in (turbu, turbv, turbw): tu_db = os.path.join(dbdir, tu).replace('/', '\\') tu_run = os.path.join(case['[turb_dir]'], tu).replace('/', '\\') rpl = (tu_db, tu_run, dbdir.replace('/', '\\')) shellscript += 'IF NOT EXIST "%s" move /y "%s" "%s"\n' % rpl # remove turbulence from run dir allturb = os.path.join(case['[turb_dir]'], '*.*') allturb = allturb.replace('/', '\\') # do not prompt for delete confirmation: /Q shellscript += 'del /Q "%s"\n' % allturb if i_case_script >= cases_per_script: # footer: copy all files back shellscript += footer % i_script stop = True write_file(scriptpath, shellscript, 'w') print('\nrun local shell script written to:') print(scriptpath) # header of the new script, each process has its own copy # but only if there are actually jobs left if i_case+1 < tot_cases: i_script += 1 i_case_script = 1 shellscript = header % (i_script, i_script, i_script, i_script) stop = False else: i_case_script += 1 # we might have missed the footer of a partial script if not stop: shellscript += footer % i_script write_file(scriptpath, shellscript, 'w') print('\nrun local shell script written to:') print(scriptpath) def run_local_ram(cases, check_log=True): ram_root = '/tmp/HAWC2/' if not os.path.exists(ram_root): os.makedirs(ram_root) print('copying data from run_dir to RAM...', end='') # first copy everything to RAM for ii, case in enumerate(cases): # all tags for the current case tags = cases[case] run_dir = copy.copy(tags['[run_dir]']) run_dir_ram = ram_root + tags['[sim_id]'] if not os.path.exists(run_dir_ram): os.makedirs(run_dir_ram) # and also change the run dir so we can launch it easily tags['[run_dir]'] = run_dir_ram + '/' for root, dirs, files in os.walk(run_dir): run_dir_base = os.path.commonprefix([root, run_dir]) cdir = root.replace(run_dir_base, '') dstbase = os.path.join(run_dir_ram, cdir) if not os.path.exists(dstbase): os.makedirs(dstbase) for fname in files: src = os.path.join(root, fname) dst = os.path.join(dstbase, fname) shutil.copy2(src, dst) print('done') # launch from RAM run_local(cases, check_log=check_log) # change run_dir back to original for ii, case in enumerate(cases): tags = cases[case] tags['[run_dir]'] = run_dir print('copying data from RAM back to run_dir') print('run_dir: %s' % run_dir) # and copy everything back for root, dirs, files in os.walk(run_dir_ram): run_dir_base = os.path.commonprefix([root, run_dir_ram]) cdir = root.replace(run_dir_base, '') # in case it is the same if len(cdir) == 0: pass # join doesn't work if cdir has a leading / ?? so drop it elif cdir[0] == '/': dstbase = os.path.join(run_dir, cdir[1:]) for fname in files: src = os.path.join(root, fname) dst = os.path.join(dstbase, fname) if not os.path.exists(dstbase): os.makedirs(dstbase) try: shutil.copy2(src, dst) except Exception as e: print('src:', src) print('dst:', dst) print(e) print() pass print('...done') return cases def run_local(cases, silent=False, check_log=True): """ Run all HAWC2 simulations locally from cases =============================================== Run all case present in a cases dict locally and wait until HAWC2 is ready. In verbose mode, each HAWC2 simulation is also timed Parameters ---------- cases : dict{ case : dict{tag : value} } Dictionary where each case is a key and its value a dictionary holding all the tags/value pairs as used for that case check_log : boolean, default=False Check the log file emmidiately after execution of the HAWC2 case silent : boolean, default=False When False, usefull information will be printed and the HAWC2 simulation time will be calculated from the Python perspective. The silent variable is also passed on to logcheck_case Returns ------- cases : dict{ case : dict{tag : value} } Update cases with the STDOUT of the respective HAWC2 simulation """ # remember the current working directory cwd = str(os.getcwd()) nr = len(cases) if not silent: print('') print('='*79) print('Be advised, launching %i HAWC2 simulation(s) sequentially' % nr) print('run dir: %s' % cases[list(cases.keys())[0]]['[run_dir]']) print('') if check_log: errorlogs = ErrorLogs(silent=silent) for ii, case in enumerate(cases): # all tags for the current case tags = cases[case] # for backward compatibility assume default HAWC2 executable try: hawc2_exe = tags['[hawc2_exe]'] except KeyError: hawc2_exe = 'hawc2-latest' # TODO: if a turbulence data base is set, copy the files from there # the launch command cmd = 'WINEDEBUG=-all WINEARCH=win32 WINEPREFIX=~/.wine32 wine' cmd += " %s %s%s" % (hawc2_exe, tags['[htc_dir]'], case) # remove any escaping in tags and case for security reasons cmd = cmd.replace('\\','') # browse to the correct launch path for the HAWC2 simulation os.chdir(tags['[run_dir]']) # create the required directories dirkeys = ['[data_dir]', '[htc_dir]', '[res_dir]', '[log_dir]', '[eigenfreq_dir]', '[animation_dir]', '[turb_dir]', '[wake_dir]', '[meander_dir]', '[opt_dir]', '[control_dir]', '[mooring_dir]', '[hydro_dir]', '[externalforce]'] for dirkey in dirkeys: if tags[dirkey]: if not os.path.exists(tags[dirkey]): os.makedirs(tags[dirkey]) if not silent: start = time() progress = '%4i/%i : %s%s' % (ii+1, nr, tags['[htc_dir]'], case) print('*'*75) print(progress) # and launch the HAWC2 simulation p = sproc.Popen(cmd,stdout=sproc.PIPE,stderr=sproc.STDOUT,shell=True) # p.wait() will lock the current shell until p is done # p.stdout.readlines() checks if there is any output, but also locks # the thread if nothing comes back # save the output that HAWC2 sends to the shell to the cases # note that this is a list, each item holding a line cases[case]['sim_STDOUT'] = p.stdout.readlines() # wait until HAWC2 finished doing its magic p.wait() if not silent: # print(the simulation command line output print(' ' + '-'*75) print(''.join(cases[case]['sim_STDOUT'])) print(' ' + '-'*75) # caclulation time stp = time() - start stpmin = stp/60. print('HAWC2 execution time: %8.2f sec (%8.2f min)' % (stp,stpmin)) # where there any errors in the output? If yes, abort for k in cases[case]['sim_STDOUT']: kstart = k[:14] if kstart in [' *** ERROR ***', 'forrtl: severe']: cases[case]['[hawc2_sim_ok]'] = False #raise UserWarning, 'Found error in HAWC2 STDOUT' else: cases[case]['[hawc2_sim_ok]'] = True # check the log file strait away if required if check_log: start = time() errorlogs = logcheck_case(errorlogs, cases, case, silent=silent) stop = time() - start if case.endswith('.htc'): kk = case[:-4] + '.log' else: kk = case + '.log' errors = errorlogs.MsgListLog2[kk][0] exitok = errorlogs.MsgListLog2[kk][1] if not silent: print('log checks took %5.2f sec' % stop) print(' found error: ', errors) print(' exit correctly: ', exitok) print('*'*75) print() # also save in cases if not errors and exitok: cases[case]['[hawc2_sim_ok]'] = True else: cases[case]['[hawc2_sim_ok]'] = False if check_log: # take the last case to determine sim_id, run_dir and log_dir sim_id = cases[case]['[sim_id]'] run_dir = cases[case]['[run_dir]'] log_dir = cases[case]['[log_dir]'] # save the extended (.csv format) errorlog list? # but put in one level up, so in the logfiles folder directly errorlogs.ResultFile = sim_id + '_ErrorLog.csv' # use the model path of the last encoutered case in cases errorlogs.PathToLogs = os.path.join(run_dir, log_dir) errorlogs.save() # just in case, browse back the working path relevant for the python magic os.chdir(cwd) if not silent: print('\nHAWC2 has done all of its sequential magic!') print('='*79) print('') return cases def prepare_launch(iter_dict, opt_tags, master, variable_tag_func, write_htc=True, runmethod='none', verbose=False, copyback_turb=True, msg='', silent=False, check_log=True, update_cases=False, ignore_non_unique=False, wine_appendix='', run_only_new=False, windows_nr_cpus=2, wine_64bit=False, pbs_fname_appendix=True, short_job_names=True, qsub='', update_model_data=True, maxcpu=1, pyenv='wetb_py3'): """ Create the htc files, pbs scripts and replace the tags in master file ===================================================================== Do not use any uppercase letters in the filenames, since HAWC2 will convert all of them to lower case results file names (.sel, .dat, .log) create sub folders according to sim_id, in order to not create one folder for the htc, results, logfiles which grows very large in due time!! opt_tags is a list of dictionaries of tags: [ {tag1=12,tag2=23,..},{tag1=11, tag2=33, tag9=5,...},...] for each wind, yaw and coning combi, each tag dictionary in the list will be set. Make sure to always define all dictionary keys in each list, otherwise the value of the first appareance will remain set for the remaining simulations in the list. For instance, in the example above, if tag9=5 is not set for subsequent lists, tag9 will remain having value 5 for these subsequent sets The tags for each case are consequently set in following order (or presedence): * master * opt_tags * iter_dict * variable_tag_func Parameters ---------- iter_dict : dict opt_tags : list master : HtcMaster object variable_tag_func : function object write_htc : boolean, default=True verbose : boolean, default=False runmethod : {'none' (default),'pbs','linux-script','local', 'local-ram', 'windows-script'} Specify how/what to run where. For local, each case in cases is run locally via python directly. If set to 'linux-script' a shell script is written to run all cases locally sequential. If set to 'pbs', PBS scripts are written for a cluster (e.g. Gorm/jess). A Windows batch script is written in case of windows-script, and is used in combination with windows_nr_cpus. msg : str, default='' A descriptive message of the simulation series is saved at "post_dir + master.tags['[sim_id]'] + '_tags.txt'". Additionally, this tagfile also holds the opt_tags and iter_dict values. update_cases : boolean, default=False If True, a current cases dictionary can be updated with new simulations qsub : str, default='' Valid options are 'time' (use with launch), 'depend' (use with launch.py --depend) or '' (use with launch.py). Empty string means there are no tags placed in the pbs file, and consequently the pbs file can be submitted as is. When using qsub='time', a start time option is inserted with a start time tag that has to be set at launch time. With 'depend', a job_id dependency line is added, and when launching the job this dependency needs to specified. update_model_data : default=True If set to False, the zip file will not be created, and the data files are not copied to the run_dir. Use this when only updating the htc files. Returns ------- cases : dict{ case : dict{tag : value} } Dictionary where each case is a key and its value a dictionary holding all the tags/value pairs as used for that case """ post_dir = master.tags['[post_dir]'] fpath_post_base = os.path.join(post_dir, master.tags['[sim_id]']) # either take a currently existing cases dictionary, or create a new one if update_cases: try: FILE = open(fpath_post_base + '.pkl', 'rb') cases = pickle.load(FILE) FILE.close() print('updating cases for %s' % master.tags['[sim_id]']) except IOError: print(79*'=') print("failed to load cases dict for updating simd_id at:") print(fpath_post_base + '.pkl') print(79*'=') cases = {} # but only run the new cases cases_to_run = {} else: cases = {} # if empty, just create a dummy item so we get into the loops if len(iter_dict) == 0: iter_dict = {'__dummy__': [0]} combi_list = create_multiloop_list(iter_dict) # load the master htc file as a string under the master.tags master.loadmaster() # save a copy of the default values mastertags_default = copy.copy(master.tags) # ignore if the opt_tags is empty, will result in zero if len(opt_tags) > 0: sim_total = len(combi_list)*len(opt_tags) else: sim_total = len(combi_list) # if no opt_tags specified, create an empty dummy tag opt_tags = [dict({'__DUMMY_TAG__' : 0})] sim_nr = 0 # make sure all the required directories are in place at run_dir # master.create_run_dir() # master.init_multithreads() # cycle thourgh all the combinations for it in combi_list: for ot in opt_tags: sim_nr += 1 # starting point should always be the default values. This is # important when a previous case had a certain tag defined, and in # the next case it is absent. master.tags = mastertags_default.copy() # update the tags from the opt_tags list if not '__DUMMY_TAG__' in ot: master.tags.update(ot) # update the tags set in the combi_list master.tags.update(it) # force lower case values as defined in output_dirs master.lower_case_output() # ----------------------------------------------------------- # start variable tags update if variable_tag_func is not None: master = variable_tag_func(master) # end variable tags # ----------------------------------------------------------- if not silent: print('htc progress: ' + format(sim_nr, '3.0f') + '/' + \ format(sim_total, '3.0f')) if verbose: print('===master.tags===\n', master.tags) # returns a dictionary with all the tags used for this # specific case htc = master.createcase(write_htc=write_htc) master.create_run_dir() #htc=master.createcase_check(cases_repo,write_htc=write_htc) # make sure the current cases is unique! if not ignore_non_unique: if list(htc.keys())[0] in cases: msg = 'non unique case in cases: %s' % list(htc.keys())[0] raise KeyError(msg) # save in the big cases. Note that values() gives a copy! cases[list(htc.keys())[0]] = list(htc.values())[0] # if we have an update scenario, keep track of the cases we want # to run again. This prevents us from running all cases on every # update if run_only_new: cases_to_run[list(htc.keys())[0]] = list(htc.values())[0] if verbose: print('created cases for: %s.htc\n' % master.tags['[case_id]']) # print(master.queue.get()) # only copy data and create zip after all htc files have been created. # Note that createcase could also creat other input files # create the execution folder structure and copy all data to it # FIXME: this approach only considers the tags as set in the last case! if update_model_data: master.copy_model_data() # create the zip file master.create_model_zip() # create directory if post_dir does not exists try: os.makedirs(post_dir) except OSError: pass FILE = open(fpath_post_base + '.pkl', 'wb') pickle.dump(cases, FILE, protocol=2) FILE.close() if not silent: print('\ncases saved at:') print(fpath_post_base + '.pkl') # also save the iter_dict and opt_tags in a text file for easy reference # or quick checks on what each sim_id actually contains # sort the taglist for convienent reading/comparing tagfile = msg + '\n\n' tagfile += '='*79 + '\n' tagfile += 'iter_dict\n'.rjust(30) tagfile += '='*79 + '\n' iter_dict_list = sorted(iter(iter_dict.items()), key=itemgetter(0)) for k in iter_dict_list: tagfile += str(k[0]).rjust(30) + ' : ' + str(k[1]).ljust(20) + '\n' tagfile += '\n' tagfile += '='*79 + '\n' tagfile += 'opt_tags\n'.rjust(30) tagfile += '='*79 + '\n' for k in opt_tags: tagfile += '\n' tagfile += '-'*79 + '\n' tagfile += 'opt_tags set\n'.rjust(30) tagfile += '-'*79 + '\n' opt_dict = sorted(iter(k.items()), key=itemgetter(0), reverse=False) for kk in opt_dict: tagfile += str(kk[0]).rjust(30)+' : '+str(kk[1]).ljust(20) + '\n' if update_cases: mode = 'a' else: mode = 'w' write_file(fpath_post_base + '_tags.txt', tagfile, mode) if run_only_new: cases = cases_to_run launch(cases, runmethod=runmethod, verbose=verbose, check_log=check_log, copyback_turb=copyback_turb, qsub=qsub, wine_appendix=wine_appendix, windows_nr_cpus=windows_nr_cpus, short_job_names=short_job_names, pbs_fname_appendix=pbs_fname_appendix, silent=silent, maxcpu=maxcpu, pyenv=pyenv, wine_64bit=wine_64bit) return cases def prepare_relaunch(cases, runmethod='gorm', verbose=False, write_htc=True, copyback_turb=True, silent=False, check_log=True): """ Instead of redoing everything, we know recreate the HTC file for those in the given cases dict. Nothing else changes. The data and zip files are not updated, the convience tagfile is not recreated. However, the saved (pickled) cases dict corresponding to the sim_id is updated! This method is usefull to correct mistakes made for some cases. It is adviced to not change the case_id, sim_id, from the cases. """ # initiate the HtcMaster object, load the master file master = HtcMaster() # for invariant tags, load random case. Necessary before we can load # the master file, otherwise we don't know which master to load master.tags = cases[list(cases.keys())[0]] master.loadmaster() # load the original cases dict post_dir = master.tags['[post_dir]'] FILE = open(post_dir + master.tags['[sim_id]'] + '.pkl', 'rb') cases_orig = pickle.load(FILE) FILE.close() sim_nr = 0 sim_total = len(cases) for case, casedict in cases.items(): sim_nr += 1 # set all the tags in the HtcMaster file master.tags = casedict # returns a dictionary with all the tags used for this # specific case htc = master.createcase(write_htc=write_htc) #htc=master.createcase_check(cases_repo,write_htc=write_htc) if not silent: print('htc progress: ' + format(sim_nr, '3.0f') + '/' + \ format(sim_total, '3.0f')) if verbose: print('===master.tags===\n', master.tags) # make sure the current cases already exists, otherwise we are not # relaunching! if case not in cases_orig: msg = 'relaunch only works for existing cases: %s' % case raise KeyError(msg) # save in the big cases. Note that values() gives a copy! # remark, what about the copying done at the end of master.createcase? # is that redundant then? cases[list(htc.keys())[0]] = list(htc.values())[0] if verbose: print('created cases for: %s.htc\n' % master.tags['[case_id]']) launch(cases, runmethod=runmethod, verbose=verbose, check_log=check_log, copyback_turb=copyback_turb, silent=silent) # update the original file: overwrite the newly set cases FILE = open(post_dir + master.tags['[sim_id]'] + '.pkl', 'wb') cases_orig.update(cases) pickle.dump(cases_orig, FILE, protocol=2) FILE.close() def prepare_launch_cases(cases, runmethod='gorm', verbose=False,write_htc=True, copyback_turb=True, silent=False, check_log=True, variable_tag_func=None, sim_id_new=None): """ Same as prepare_launch, but now the input is just a cases object (cao). If relaunching some earlier defined simulations, make sure to at least rename the sim_id, otherwise it could become messy: things end up in the same folder, sim_id post file get overwritten, ... In case you do not use a variable_tag_fuc, make sure all your tags are defined in cases. First and foremost, this means that the case_id does not get updated to have a new sim_id, the path's are not updated, etc When given a variable_tag_func, make sure it is properly defined: do not base a variable tag's value on itself to avoid value chains The master htc file will be loaded and alls tags defined in the cases dict will be applied to it as is. """ # initiate the HtcMaster object, load the master file master = HtcMaster() # for invariant tags, load random case. Necessary before we can load # the master file, otherwise we don't know which master to load master.tags = cases[list(cases.keys())[0]] # load the master htc file as a string under the master.tags master.loadmaster() # create the execution folder structure and copy all data to it # but reset to the correct launch dirs first sim_id = master.tags['[sim_id]'] if runmethod in ['local', 'local-script', 'none']: path = '/home/dave/PhD_data/HAWC2_results/ojf_post/%s/' % sim_id master.tags['[run_dir]'] = path elif runmethod == 'jess': master.tags['[run_dir]'] = '/mnt/jess/HAWC2/ojf_post/%s/' % sim_id elif runmethod == 'gorm': master.tags['[run_dir]'] = '/mnt/gorm/HAWC2/ojf_post/%s/' % sim_id else: msg='unsupported runmethod, options: none, local, thyra, gorm, opt' raise ValueError(msg) master.create_run_dir() master.copy_model_data() # create the zip file master.create_model_zip() sim_nr = 0 sim_total = len(cases) # for safety, create a new cases dict. At the end of the ride both cases # and cases_new should be identical! cases_new = {} # cycle thourgh all the combinations for case, casedict in cases.items(): sim_nr += 1 sim_id = casedict['[sim_id]'] # reset the launch dirs if runmethod in ['local', 'local-script', 'none']: path = '/home/dave/PhD_data/HAWC2_results/ojf_post/%s/' % sim_id casedict['[run_dir]'] = path elif runmethod == 'thyra': casedict['[run_dir]'] = '/mnt/thyra/HAWC2/ojf_post/%s/' % sim_id elif runmethod == 'gorm': casedict['[run_dir]'] = '/mnt/gorm/HAWC2/ojf_post/%s/' % sim_id else: msg='unsupported runmethod, options: none, local, thyra, gorm, opt' raise ValueError(msg) # ----------------------------------------------------------- # set all the tags in the HtcMaster file master.tags = casedict # apply the variable tags if applicable if variable_tag_func: master = variable_tag_func(master) elif sim_id_new: # TODO: finish this # replace all the sim_id occurences with the updated one # this means also the case_id tag changes! pass # ----------------------------------------------------------- # returns a dictionary with all the tags used for this specific case htc = master.createcase(write_htc=write_htc) if not silent: print('htc progress: ' + format(sim_nr, '3.0f') + '/' + \ format(sim_total, '3.0f')) if verbose: print('===master.tags===\n', master.tags) # make sure the current cases is unique! if list(htc.keys())[0] in cases_new: msg = 'non unique case in cases: %s' % list(htc.keys())[0] raise KeyError(msg) # save in the big cases. Note that values() gives a copy! # remark, what about the copying done at the end of master.createcase? # is that redundant then? cases_new[list(htc.keys())[0]] = list(htc.values())[0] if verbose: print('created cases for: %s.htc\n' % master.tags['[case_id]']) post_dir = master.tags['[post_dir]'] # create directory if post_dir does not exists try: os.makedirs(post_dir) except OSError: pass FILE = open(post_dir + master.tags['[sim_id]'] + '.pkl', 'wb') pickle.dump(cases_new, FILE, protocol=2) FILE.close() if not silent: print('\ncases saved at:') print(post_dir + master.tags['[sim_id]'] + '.pkl') launch(cases_new, runmethod=runmethod, verbose=verbose, copyback_turb=copyback_turb, check_log=check_log) return cases_new def launch(cases, runmethod='none', verbose=False, copyback_turb=True, silent=False, check_log=True, windows_nr_cpus=2, qsub='time', wine_appendix='', pbs_fname_appendix=True, short_job_names=True, maxcpu=1, pyenv='wetb_py3', wine_64bit=False): """ The actual launching of all cases in the Cases dictionary. Note that here only the PBS files are written and not the actuall htc files. Parameters ---------- cases : dict Dictionary with the case name as key and another dictionary as value. The latter holds all the tag/value pairs used in the respective simulation. verbose : boolean, default=False runmethod : {'none' (default),'pbs','linux-script','local', 'local-ram', 'windows-script'} Specify how/what to run where. For local, each case in cases is run locally via python directly. If set to 'linux-script' a shell script is written to run all cases locally sequential. If set to 'pbs', PBS scripts are written for a cluster (e.g. Gorm/jess). A Windows batch script is written in case of windows-script, and is used in combination with windows_nr_cpus. windows_nr_cpus : int, default=2 All cases to be run are distributed over 'windows_nr_cpus' number of Windows batch files so the user can utilize 'windows_nr_cpus' CPUs. """ random_case = list(cases.keys())[0] sim_id = cases[random_case]['[sim_id]'] pbs_out_dir = cases[random_case]['[pbs_out_dir]'] if runmethod == 'local-script' or runmethod == 'linux-script': local_shell_script(cases, sim_id) elif runmethod == 'windows-script': local_windows_script(cases, sim_id, nr_cpus=windows_nr_cpus) elif runmethod in ['pbs','jess','gorm']: # create the pbs object pbs = PBS(cases, short_job_names=short_job_names, pyenv=pyenv, pbs_fname_appendix=pbs_fname_appendix, qsub=qsub, verbose=verbose, silent=silent, wine_64bit=wine_64bit) pbs.wine_appendix = wine_appendix pbs.copyback_turb = copyback_turb pbs.pbs_out_dir = pbs_out_dir pbs.maxcpu = maxcpu pbs.create() elif runmethod == 'local': cases = run_local(cases, silent=silent, check_log=check_log) elif runmethod =='local-ram': cases = run_local_ram(cases, check_log=check_log) elif runmethod == 'none': pass else: msg = 'unsupported runmethod, valid options: local, linux-script, ' \ 'windows-script, local-ram, none, pbs' raise ValueError(msg) def post_launch(cases, save_iter=False, silent=False, suffix=None, path_errorlog=None): """ Do some basics checks: do all launched cases have a result and LOG file and are there any errors in the LOG files? Parameters ---------- cases : either a string (path to file) or the cases itself save_iter : boolean, default=False Set to True to save the number of iterations per time step in *.iter file (in the same folder as the logfile) path_errorlog : str, default=None Root path of the error logfiles. If set to None (default), the value set in the [run_dir] tag is used as the root folder of the logfiles. suffix : str, default=None If not None, the suffix will be appended to file name of the error log analysis file as follows: "ErrorLog_suffix.csv". """ # TODO: finish support for default location of the cases and file name # two scenario's: either pass on an cases and get from their the # post processing path or pass on the simid and load from the cases # from the default location # in case run_local, do not check PBS! # in case it is a path, load the cases if type(cases).__name__ == 'str': cases = load_pickled_file(cases) # saving output to textfile and print(at the same time LOG = Log() LOG.print_logging = True # load one case dictionary from the cases to get data that is the same # over all simulations in the cases try: master = list(cases.keys())[0] except IndexError: print('there are no cases, aborting...') return None post_dir = cases[master]['[post_dir]'] sim_id = cases[master]['[sim_id]'] run_dir = cases[master]['[run_dir]'] log_dir = cases[master]['[log_dir]'] # for how many of the created cases are there actually result, log files pbs = PBS(cases) pbs.cases = cases cases_fail = pbs.check_results(cases) # add the failed cases to the LOG: LOG.add(['number of failed cases: ' + str(len(cases_fail))]) LOG.add(list(cases_fail)) # for k in cases_fail: # print(k # initiate the object to check the log files errorlogs = ErrorLogs(cases=cases) LOG.add(['checking ' + str(len(cases)) + ' LOG files...']) nr = 1 nr_tot = len(cases) tmp = list(cases.keys())[0] if not silent: print('checking logs, path (from a random item in cases):') print(os.path.join(run_dir, log_dir)) for k in sorted(cases.keys()): # a case could not have a result, but a log file might still exist if k.endswith('.htc'): kk = k[:-4] + '.log' else: kk = k + '.log' # note that if errorlogs.PathToLogs is a file, it will only check that # file. If it is a directory, it will check all that is in the dir run_dir = cases[k]['[run_dir]'] log_dir = cases[k]['[log_dir]'] errorlogs.PathToLogs = os.path.join(run_dir, log_dir, kk) try: errorlogs.check(save_iter=save_iter) if not silent: print('checking logfile progress: % 6i/% 6i' % (nr, nr_tot)) except IOError: if not silent: print(' no logfile for: %s' % (errorlogs.PathToLogs)) except Exception as e: if not silent: print(' log analysis failed for: %s' % kk) print(e) nr += 1 # if simulation did not ended correctly, put it on the fail list try: if not errorlogs.MsgListLog2[kk][1]: cases_fail[k] = cases[k] except KeyError: pass # now see how many cases resulted in an error and add to the general LOG # determine how long the first case name is try: spacing = len(list(errorlogs.MsgListLog2.keys())[0]) + 9 except Exception as e: print('nr of OK cases: %i' % (len(cases) - len(cases_fail))) raise(e) LOG.add(['display log check'.ljust(spacing) + 'found_error?'.ljust(15) + \ 'exit_correctly?']) for k in errorlogs.MsgListLog2: LOG.add([k.ljust(spacing)+str(errorlogs.MsgListLog2[k][0]).ljust(15)+\ str(errorlogs.MsgListLog2[k][1]) ]) # save the extended (.csv format) errorlog list? # but put in one level up, so in the logfiles folder directly errorlogs.ResultFile = sim_id + '_ErrorLog.csv' # save the log file analysis in the run_dir instead of the log_dir if path_errorlog is None: errorlogs.PathToLogs = run_dir# + log_dir else: errorlogs.PathToLogs = path_errorlog errorlogs.save(suffix=suffix) # save the error LOG list, this is redundant, since it already exists in # the general LOG file (but only as a print, not the python variable) tmp = os.path.join(post_dir, sim_id + '_MsgListLog2') save_pickle(tmp, errorlogs.MsgListLog2) # save the list of failed cases save_pickle(os.path.join(post_dir, sim_id + '_fail.pkl'), cases_fail) return cases_fail def copy_pbs_in_failedcases(cases_fail, path='pbs_in_fail', silent=True): """ Copy all the pbs_in files from failed cases to a new directory so it is easy to re-launch them """ if not silent: print('Following failed cases pbs_in files are copied:') for cname in cases_fail.keys(): case = cases_fail[cname] pbs_in_fname = '%s.p' % (case['[case_id]']) run_dir = case['[run_dir]'] src = os.path.join(run_dir, case['[pbs_in_dir]'], pbs_in_fname) pbs_in_dir_fail = case['[pbs_in_dir]'].replace('pbs_in', path) dst = os.path.join(run_dir, pbs_in_dir_fail, pbs_in_fname) if not silent: print(dst) if not os.path.exists(os.path.dirname(dst)): os.makedirs(os.path.dirname(dst)) shutil.copy2(src, dst) def logcheck_case(errorlogs, cases, case, silent=False): """ Check logfile of a single case ============================== Given the cases and a case, check that single case on errors in the logfile. """ #post_dir = cases[case]['[post_dir]'] #sim_id = cases[case]['[sim_id]'] run_dir = cases[case]['[run_dir]'] log_dir = cases[case]['[log_dir]'] if case.endswith('.htc'): caselog = case[:-4] + '.log' else: caselog = case + '.log' errorlogs.PathToLogs = os.path.join(run_dir, log_dir, caselog) errorlogs.check() # in case we find an error, abort or not? errors = errorlogs.MsgListLog2[caselog][0] exitcorrect = errorlogs.MsgListLog2[caselog][1] if errors: # print all error messages #logs.MsgListLog : [ [case, line nr, error1, line nr, error2, ....], ] # difficult: MsgListLog is not a dict!! #raise UserWarning, 'HAWC2 simulation has errors in logfile, abort!' #warnings.warn('HAWC2 simulation has errors in logfile!') logging.warn('HAWC2 simulation has errors in logfile!') elif not exitcorrect: #raise UserWarning, 'HAWC2 simulation did not ended correctly, abort!' #warnings.warn('HAWC2 simulation did not ended correctly!') logging.warn('HAWC2 simulation did not ended correctly!') # no need to do that, aborts on failure anyway and OK log check will be # printed in run_local when also printing how long it took to check #if not silent: #print 'log checks ok' #print ' found error: %s' % errorlogs.MsgListLog2[caselog][0] #print 'exit correctly: %s' % errorlogs.MsgListLog2[caselog][1] return errorlogs ## save the extended (.csv format) errorlog list? ## but put in one level up, so in the logfiles folder directly #errorlogs.ResultFile = sim_id + '_ErrorLog.csv' ## use the model path of the last encoutered case in cases #errorlogs.PathToLogs = run_dir + log_dir #errorlogs.save() class Log(object): """ Class for convinient logging. Create an instance and add lines to the logfile as a list with the function add. The added items will be printed if self.print_logging = True. Default value is False Create the instance, add with .add('lines') (lines=list), save with .save(target), print(current log to screen with .printLog() """ def __init__(self): self.log = [] # option, should the lines added to the log be printed as well? self.print_logging = False self.file_mode = 'a' def add(self, lines): # the input is a list, where each entry is considered as a new line for k in lines: self.log.append(k) if self.print_logging: print(k) def save(self, target): # tread every item in the log list as a new line FILE = open(target, self.file_mode) for k in self.log: FILE.write(k + '\n') FILE.close() # and empty the log again self.log = [] def printscreen(self): for k in self.log: print(k) class HtcMaster(object): """ """ def __init__(self, verbose=False, silent=False): """ """ # TODO: make HtcMaster callable, so that when called you actually # set a value for a certain tag or add a new one. In doing so, # you can actually warn when you are overwriting a tag, or when # a different tag has the same name, etc # create a dictionary with the tag name as key as the default value self.tags = dict() # should we print(where the file is written? self.verbose = verbose self.silent = silent # following tags are required #--------------------------------------------------------------------- self.tags['[case_id]'] = None self.tags['[master_htc_file]'] = None self.tags['[master_htc_dir]'] = None # path to model zip file, needs to accessible from the server # relative from the directory where the pbs files are launched on the # server. Suggestions is to always place the zip file in the model # folder, so only the zip file name has to be defined self.tags['[model_zip]'] = None # path to HAWTOPT blade result file: quasi/res/blade.dat self.tags['[blade_hawtopt_dir]'] = None self.tags['[blade_hawtopt]'] = None self.tags['[zaxis_fact]'] = 1.0 # TODO: rename to execution dir, that description fits much better! self.tags['[run_dir]'] = None #self.tags['[run_dir]'] = '/home/dave/tmp/' # following dirs are relative to the run_dir!! # they indicate the location of the SAVED (!!) results, they can be # different from the execution dirs on the node which are set in PBS self.tags['[hawc2_exe]'] = 'hawc2mb.exe' self.tags['[data_dir]'] = 'data/' self.tags['[res_dir]'] = 'results/' self.tags['[iter_dir]'] = 'iter/' self.tags['[log_dir]'] = 'logfiles/' self.tags['[turb_dir]'] = 'turb/' self.tags['[wake_dir]'] = None self.tags['[meand_dir]'] = None self.tags['[turb_db_dir]'] = None self.tags['[wake_db_dir]'] = None self.tags['[meand_db_dir]'] = None self.tags['[control_dir]'] = 'control/' self.tags['[externalforce]'] = 'externalforce/' self.tags['[animation_dir]'] = 'animation/' self.tags['[eigenfreq_dir]'] = 'eigenfreq/' self.tags['[wake_dir]'] = 'wake/' self.tags['[meander_dir]'] = 'meander/' self.tags['[htc_dir]'] = 'htc/' self.tags['[mooring_dir]'] = 'mooring/' self.tags['[hydro_dir]'] = 'htc_hydro/' self.tags['[pbs_out_dir]'] = 'pbs_out/' self.tags['[turb_base_name]'] = None self.tags['[wake_base_name]'] = None self.tags['[meand_base_name]'] = None self.tags['[zip_root_files]'] = [] self.tags['[fname_source]'] = [] self.tags['[fname_default_target]'] = [] self.tags['[eigen_analysis]'] = False self.tags['[pbs_queue_command]'] = '#PBS -q workq' # the express que has 2 thyra nodes with max walltime of 1h # self.tags['[pbs_queue_command]'] = '#PBS -q xpresq' # walltime should have following format: hh:mm:ss self.tags['[walltime]'] = '04:00:00' # self.queue = Queue.Queue() self.output_dirs = ['[res_dir]', '[log_dir]', '[turb_dir]', '[case_id]', '[wake_dir]', '[animation_dir]', '[meand_dir]', '[eigenfreq_dir]'] def create_run_dir(self): """ If non existent, create run_dir and all required model sub directories """ dirkeys = ['[data_dir]', '[htc_dir]', '[res_dir]', '[log_dir]', '[eigenfreq_dir]', '[animation_dir]', '[turb_dir]', '[wake_dir]', '[meander_dir]', '[opt_dir]', '[control_dir]', '[mooring_dir]', '[hydro_dir]', '[externalforce]'] # create all the necessary directories for dirkey in dirkeys: if isinstance(self.tags[dirkey], str): path = os.path.join(self.tags['[run_dir]'], self.tags[dirkey]) if not os.path.exists(path): os.makedirs(path) # TODO: copy_model_data and create_model_zip should be the same. def copy_model_data(self): """ Copy the model data to the execution folder """ # in case we are running local and the model dir is the server dir # we do not need to copy the data files, they are already on location data_local = os.path.join(self.tags['[model_dir_local]'], self.tags['[data_dir]']) data_run = os.path.join(self.tags['[run_dir]'], self.tags['[data_dir]']) if data_local == data_run: return # copy root files model_root = self.tags['[model_dir_local]'] run_root = self.tags['[run_dir]'] for fname in self.tags['[zip_root_files]']: shutil.copy2(model_root + fname, run_root + fname) # copy special files with changing file names if '[ESYSMooring_init_fname]' in self.tags: if isinstance(self.tags['[ESYSMooring_init_fname]'], str): fname_source = self.tags['[ESYSMooring_init_fname]'] fname_target = 'ESYSMooring_init.dat' shutil.copy2(model_root + fname_source, run_root + fname_target) # copy the master file into the htc/_master dir src = os.path.join(self.tags['[master_htc_dir]'], self.tags['[master_htc_file]']) # FIXME: htc_dir can contain the DLC folder name dst = os.path.join(self.tags['[run_dir]'], 'htc', '_master') if not os.path.exists(dst): os.makedirs(dst) shutil.copy2(src, dst) # copy all content of the following dirs dirs = [self.tags['[control_dir]'], self.tags['[hydro_dir]'], self.tags['[mooring_dir]'], self.tags['[externalforce]'], self.tags['[data_dir]'], 'htc/DLCs/'] plocal = self.tags['[model_dir_local]'] prun = self.tags['[run_dir]'] # copy all files present in the specified folders for path in dirs: if not path: continue elif not os.path.exists(os.path.join(plocal, path)): continue for root, dirs, files in os.walk(os.path.join(plocal, path)): for file_name in files: src = os.path.join(root, file_name) dst = os.path.abspath(root).replace(os.path.abspath(plocal), os.path.abspath(prun)) if not os.path.exists(dst): os.makedirs(dst) dst = os.path.join(dst, file_name) shutil.copy2(src, dst) # and last copies: the files with generic input names if not isinstance(self.tags['[fname_source]'], list): raise ValueError('[fname_source] needs to be a list') if not isinstance(self.tags['[fname_default_target]'], list): raise ValueError('[fname_default_target] needs to be a list') len1 = len(self.tags['[fname_source]']) len2 = len(self.tags['[fname_default_target]']) if len1 != len2: raise ValueError('[fname_source] and [fname_default_target] ' 'need to have the same number of items') for i in range(len1): src = os.path.join(plocal, self.tags['[fname_source]'][i]) dst = os.path.join(prun, self.tags['[fname_default_target]'][i]) if not os.path.exists(os.path.dirname(dst)): os.makedirs(os.path.dirname(dst)) shutil.copy2(src, dst) # TODO: copy_model_data and create_model_zip should be the same. def create_model_zip(self): """ Create the model zip file based on the master tags file settings. Paremeters ---------- master : HtcMaster object """ # FIXME: all directories should be called trough their appropriate tag! #model_dir = HOME_DIR + 'PhD/Projects/Hawc2Models/'+MODEL+'/' model_dir_server = self.tags['[run_dir]'] model_dir_local = self.tags['[model_dir_local]'] # --------------------------------------------------------------------- # create the zipfile object locally fname = os.path.join(model_dir_local, self.tags['[model_zip]']) zf = zipfile.ZipFile(fname, 'w') # empty folders, the'll hold the outputs # zf.write(source, target in zip, ) # TODO: use user defined directories here and in PBS # note that they need to be same as defined in the PBS script. We # manually set these up instead of just copying the original. # animation_dir = self.tags['[animation_dir]'] # eigenfreq_dir = self.tags['[eigenfreq_dir]'] # logfiles_dir = self.tags['[log_dir]'] # results_dir = self.tags['[res_dir]'] # htc_dir = self.tags['[htc_dir]'] htcmaster = self.tags['[master_htc_file]'] control_dir = self.tags['[control_dir]'] htcmaster_dir = self.tags['[master_htc_dir]'] data_dir = self.tags['[data_dir]'] turb_dir = self.tags['[turb_dir]'] wake_dir = self.tags['[wake_dir]'] meander_dir = self.tags['[meander_dir]'] mooring_dir = self.tags['[mooring_dir]'] hydro_dir = self.tags['[hydro_dir]'] extforce = self.tags['[externalforce]'] # result dirs are not required, HAWC2 will create them dirs = [control_dir, data_dir, extforce, turb_dir, wake_dir, meander_dir, mooring_dir, hydro_dir] for zipdir in dirs: if zipdir: zf.write('.', os.path.join(zipdir, '.'), zipfile.ZIP_DEFLATED) zf.write('.', 'htc/_master/.', zipfile.ZIP_DEFLATED) # if any, add files that should be added to the root of the zip file for file_name in self.tags['[zip_root_files]']: src = os.path.join(model_dir_local, file_name) zf.write(src, file_name, zipfile.ZIP_DEFLATED) if '[ESYSMooring_init_fname]' in self.tags: if self.tags['[ESYSMooring_init_fname]'] is not None: fname_source = self.tags['[ESYSMooring_init_fname]'] fname_target = 'ESYSMooring_init.dat' zf.write(model_dir_local + fname_source, fname_target, zipfile.ZIP_DEFLATED) # the master file src = os.path.join(htcmaster_dir, htcmaster) dst = os.path.join('htc', '_master', htcmaster) zf.write(src, dst, zipfile.ZIP_DEFLATED) # manually add all that resides in control, mooring and hydro paths = [control_dir, mooring_dir, hydro_dir, extforce, data_dir] for target_path in paths: if not target_path: continue path_src = os.path.join(model_dir_local, target_path) for root, dirs, files in os.walk(path_src): for file_name in files: #print 'adding', file_name src = os.path.join(root, file_name) # the zip file only contains the relative paths rel_dst = root.replace(os.path.abspath(model_dir_local), '') if os.path.isabs(rel_dst): rel_dst = rel_dst[1:] rel_dst = os.path.join(rel_dst, file_name) zf.write(src, rel_dst, zipfile.ZIP_DEFLATED) # and last copies: the files with generic input names if not isinstance(self.tags['[fname_source]'], list): raise ValueError('[fname_source] needs to be a list') if not isinstance(self.tags['[fname_default_target]'], list): raise ValueError('[fname_default_target] needs to be a list') len1 = len(self.tags['[fname_source]']) len2 = len(self.tags['[fname_default_target]']) if len1 != len2: raise ValueError('[fname_source] and [fname_default_target] ' 'need to have the same number of items') for i in range(len1): src = os.path.join(model_dir_local, self.tags['[fname_source]'][i]) # the zip file only contains the relative paths rel_dst = self.tags['[fname_default_target]'][i] # we can not have an absolute path here, make sure it isn't if os.path.isabs(rel_dst): rel_dst = rel_dst[1:] zf.write(src, rel_dst, zipfile.ZIP_DEFLATED) # and close again zf.close() # --------------------------------------------------------------------- # copy zip file to the server, this will be used on the nodes src = os.path.join(model_dir_local, self.tags['[model_zip]']) dst = os.path.join(model_dir_server, self.tags['[model_zip]']) # in case we are running local and the model dir is the server dir # we do not need to copy the zip file, it is already on location if not src == dst: shutil.copy2(src, dst) ## copy to zip data file to sim_id htc folder on the server dir ## so we now have exactly all data to relaunch any htc file later #dst = model_dir_server + self.tags['[htc_dir]'] #dst += self.tags['[model_zip]'] #shutil.copy2(src, dst) def _sweep_tags(self): """ The original way with all tags in the htc file for each blade node """ # set the correct sweep cruve, these values are used a = self.tags['[sweep_amp]'] b = self.tags['[sweep_exp]'] z0 = self.tags['[sweep_curve_z0]'] ze = self.tags['[sweep_curve_ze]'] nr = self.tags['[nr_nodes_blade]'] # format for the x values in the htc file ff = ' 1.03f' for zz in range(nr): it_nosweep = '[x'+str(zz+1)+'-nosweep]' item = '[x'+str(zz+1)+']' z = self.tags['[z'+str(zz+1)+']'] if z >= z0: curve = eval(self.tags['[sweep_curve_def]']) # new swept position = original + sweep curve self.tags[item]=format(self.tags[it_nosweep]+curve,ff) else: self.tags[item]=format(self.tags[it_nosweep], ff) def _staircase_windramp(self, nr_steps, wind_step, ramptime, septime): """Create a stair case wind ramp """ pass def _all_in_one_blade_tag(self, radius_new=None): """ Create htc input based on a HAWTOPT blade result file Automatically get the number of nodes correct in master.tags based on the number of blade nodes WARNING: initial x position of the half chord point is assumed to be zero zaxis_fact : int, default=1.0 --> is member of default tags Factor for the htc z-axis coordinates. The htc z axis is mapped to the HAWTOPT radius. If the blade radius develops in negative z direction, set to -1 Parameters ---------- radius_new : ndarray(n), default=False z coordinates of the nodes. If False, a linear distribution is used and the tag [nr--of-nodes-per-blade] sets the number of nodes """ # TODO: implement support for x position to be other than zero # TODO: This is not a good place, should live somewhere else. Or # reconsider inputs etc so there is more freedom in changing the # location of the nodes, set initial x position of the blade etc # and save under tag [blade_htc_node_input] in htc input format nr_nodes = self.tags['[nr_nodes_blade]'] blade = self.tags['[blade_hawtopt]'] # in the htc file, blade root =0 and not blade hub radius blade[:,0] = blade[:,0] - blade[0,0] if type(radius_new).__name__ == 'NoneType': # interpolate to the specified number of nodes radius_new = np.linspace(blade[0,0], blade[-1,0], nr_nodes) # Data checks on radius_new elif not type(radius_new).__name__ == 'ndarray': raise ValueError('radius_new has to be either NoneType or ndarray') else: if not len(radius_new.shape) == 1: raise ValueError('radius_new has to be 1D') elif not len(radius_new) == nr_nodes: msg = 'radius_new has to have ' + str(nr_nodes) + ' elements' raise ValueError(msg) # save the nodal positions in the tag cloud self.tags['[blade_nodes_z_positions]'] = radius_new # make sure that radius_hr is just slightly smaller than radius low res radius_new[-1] = blade[-1,0]-0.00000001 twist_new = interpolate.griddata(blade[:,0], blade[:,2], radius_new) # blade_new is the htc node input part: # sec 1 x y z twist; blade_new = scipy.zeros((len(radius_new),4)) blade_new[:,2] = radius_new*self.tags['[zaxis_fact]'] # twist angle remains the same in either case (standard/ojf rotation) blade_new[:,3] = twist_new*-1. # set the correct sweep cruve, these values are used a = self.tags['[sweep_amp]'] b = self.tags['[sweep_exp]'] z0 = self.tags['[sweep_curve_z0]'] ze = self.tags['[sweep_curve_ze]'] tmp = 'nsec ' + str(nr_nodes) + ';' for k in range(nr_nodes): tmp += '\n' i = k+1 z = blade_new[k,2] y = blade_new[k,1] twist = blade_new[k,3] # x position, sweeping? if z >= z0: x = eval(self.tags['[sweep_curve_def]']) else: x = 0.0 # the node number tmp += ' sec ' + format(i, '2.0f') tmp += format(x, ' 11.03f') tmp += format(y, ' 11.03f') tmp += format(z, ' 11.03f') tmp += format(twist, ' 11.03f') tmp += ' ;' self.tags['[blade_htc_node_input]'] = tmp # and create the ae file #5 Blade Radius [m] Chord[m] T/C[%] Set no. of pc file #1 25 some comments #0.000 0.100 21.000 1 nr_points = blade.shape[0] tmp2 = '1 Blade Radius [m] Chord [m] T/C [%] pc file set nr\n' tmp2 += '1 %i auto generated by _all_in_one_blade_tag()' % nr_points for k in range(nr_points): tmp2 += '\n' tmp2 += '%9.3f %9.3f %9.3f' % (blade[k,0], blade[k,1], blade[k,3]) tmp2 += ' %4i' % (k+1) # end with newline tmp2 += '\n' # TODO: finish writing file, implement proper handling of hawtopt path # and save the file #if self.tags['aefile'] #write_file(file_path, tmp2, 'w') def loadmaster(self): """ Load the master file, path to master file is defined in __init__(): target, master. Additionally, find all the tags in the master file. Note that tags [] in the label and comment sections are ignored. All the tags that are found in the master file are saved in the self.tags_in_master dictionary, with the line numbers in a list as values: tags_in_master[tagname] = [line nr occurance 1, line nr occurance 2, ] note that tagname includes the [] """ # what is faster, load the file in one string and do replace()? # or the check error log approach? fpath = os.path.join(self.tags['[master_htc_dir]'], self.tags['[master_htc_file]']) # load the file: if not self.silent: print('loading master: ' + fpath) with open(fpath, 'r') as f: lines = f.readlines() # regex for finding all tags in a line regex = re.compile('(\\[.*?\\])') self.tags_in_master = {} # convert to string: self.master_str = '' for i, line in enumerate(lines): # are there any tags on this line? Ignore comment AND label section tags = regex.findall(line.split(';')[0].split('#')[0]) for tag in tags: try: self.tags_in_master[tag].append(i) except KeyError: self.tags_in_master[tag] = [i] # safe for later self.master_str += line def createcase_check(self, htc_dict_repo, \ tmp_dir='/tmp/HawcPyTmp/', write_htc=True): """ Check if a certain case name already exists in a specified htc_dict. If true, return a message and do not create the case. It can be that either the case name is a duplicate and should be named differently, or that the simulation is a duplicate and it shouldn't be repeated. """ # is the [case_id] tag unique, given the htc_dict_repo? if self.verbose: print('checking if following case is in htc_dict_repo: ') print(self.tags['[case_id]'] + '.htc') if self.tags['[case_id]'] + '.htc' in htc_dict_repo: # if the new case_id already exists in the htc_dict_repo # do not add it again! # print('case_id key is not unique in the given htc_dict_repo!' raise UserWarning('case_id key is not unique in the given ' 'htc_dict_repo!') else: htc = self.createcase(tmp_dir=tmp_dir, write_htc=write_htc) return htc def createcase(self, tmp_dir='/tmp/HawcPyTmp/', write_htc=True): """ replace all the tags from the master file and save the new htc file """ htc = self.master_str # FIXME: HAWC2 always outputs result and logfile in lower case, so # force case_id/Case id. to be lower case self.tags['[case_id]'] = self.tags['[case_id]'].lower() if '[Case id.]' in self.tags: self.tags['[Case id.]'] = self.tags['[Case id.]'].lower() # and now replace all the tags in the htc master file # when iterating over a dict, it will give the key, given in the # corresponding format (string keys as strings, int keys as ints...) for k in self.tags: # TODO: give error if a default is not defined, like null # if it is a boolean, replace with ; or blank if isinstance(self.tags[k], bool): if self.tags[k]: # we have a boolean that is True, switch it on value = '' else: value = ';' else: value = self.tags[k] # if string is not found, it will do nothing htc = htc.replace(str(k), str(value)) # and save the the case htc file: cname = self.tags['[case_id]'] + '.htc' htc_target = os.path.join(self.tags['[run_dir]'], self.tags['[htc_dir]']) if not self.silent: print('htc will be written to: ') print(' ' + htc_target) print(' ' + cname) # and write the htc file to the temp dir first if write_htc: self.write_htc(cname, htc, htc_target) # thread = threading.Thread(target=self.write_htc, # args=(cname, htc, htc_target)) # thread.daemon = True # thread.start() # save all the tags for debugging purpuses if self.verbose: tmp = '' for key in sorted(self.tags.keys()): value = self.tags[key] rpl = (key.rjust(25), str(value).ljust(70), type(key).__name__.ljust(10), type(value).__name__) tmp += '%s -- %s -- %s -- %s\n' % rpl write_file(htc_target + cname + '.tags', tmp, 'w') # return the used tags, some parameters can be used later, such as the # turbulence name in the pbs script # return as a dictionary, to be used in htc_dict # return a copy of the tags, otherwise you will not catch changes # made to the different tags in your sim series return {cname : copy.copy(self.tags)} def write_htc(self, cname, htc, htc_target): # create subfolder if necesarry if not os.path.exists(htc_target): os.makedirs(htc_target) write_file(htc_target + cname, htc, 'w') # write_file(tmp_dir + case, htc, 'w') def lower_case_output(self): """ force lower case tags on output files since HAWC2 will force them to lower case anyway """ for key in self.output_dirs: if isinstance(self.tags[key], str): self.tags[key] = self.tags[key].lower() def write_tags(self, fname=False): """Write all tags to a DLC alike spreadsheet """ if not fname: a, b = self.tags['[master_htc_dir]'], self.tags['[master_htc_file]'] fname = os.path.join(a, b).replace('.htc', '.xlsx') df = pd.DataFrame([], columns=self.tags_in_master.keys()) df.to_excel(fname) class PBS(object): """ The part where the actual pbs script is writtin in this class (functions create(), starting() and ending() ) is based on the MS Excel macro written by Torben J. Larsen input a list with htc file names, and a dict with the other paths, such as the turbulence file and folder, htc folder and others """ def __init__(self, cases, qsub='time', silent=False, pyenv='wetb_py3', pbs_fname_appendix=True, short_job_names=True, verbose=False, wine_64bit=False): """ Define the settings here. This should be done outside, but how? In a text file, paramters list or first create the object and than set the non standard values?? where htc_dict is a dictionary with [key=case name, value=used_tags_dict] where tags as outputted by MasterFile (dict with the chosen options) For gorm, maxcpu is set to 1, do not change otherwise you might need to change the scratch dir handling. qsub : str time, or depend. For time each job will need to get a start time, which will have to be set by replacing [start_time]. For depend a job dependency chain will have to be established. This will be set via [nodeps] and [job_id] When none of the above, neither of them is specified, and consequently the pbs file can be submitted without replacing any tag. Use qsub=None in combination with the launch.Scheduler short_job_names : boolean, default=True How should the job be named (relevant for the PBS queueing system)? When True, it will be named like HAWC2_123456. With False, the case_id will be used as job name. """ self.verbose = verbose self.silent = silent self.pyenv = pyenv self.pyenv_cmd = 'source /home/python/miniconda3/bin/activate' # run in 32-bit or 64-bit mode. Note this uses the same assumptions # on how to configure wine in toolbox/pbsutils/config-wine-hawc2.sh wineparam = ('win32', '~/.wine32') if wine_64bit: wineparam = ('win64', '~/.wine') self.winebase = 'time WINEARCH=%s WINEPREFIX=%s ' % wineparam self.wine = self.winebase + 'wine' self.winenumactl = self.winebase + 'numactl --physcpubind=$CPU_NR wine' # TODO: based on a certain host/architecture you can change these self.maxcpu = 1 self.secperiter = 0.012 # determine at runtime if winefix has to be ran self.winefix = ' _HOSTNAME_=`hostname`\n' self.winefix += ' if [[ ${_HOSTNAME_:0:1} == "j" ]] ; then\n' self.winefix += ' WINEARCH=%s WINEPREFIX=%s winefix\n' % wineparam self.winefix += ' fi\n' # the output channels comes with a price tag. Each time step # will have a penelty depending on the number of output channels self.iterperstep = 8.0 # average nr of iterations per time step # lead time: account for time losses when starting a simulation, # copying the turbulence data, generating the turbulence self.tlead = 5.0*60.0 # use pbs job name as prefix in the pbs file name self.pbs_fname_appendix = pbs_fname_appendix self.short_job_names = short_job_names # pbs job name prefix self.pref = 'HAWC2_' # the actual script starts empty self.pbs = '' # in case you want to redirect stdout to /dev/nul # self.wine_appendix = '> /dev/null 2>&1' self.wine_appendix = '' # /dev/shm should be the RAM of the cluster # self.node_run_root = '/dev/shm' self.node_run_root = '/scratch' self.cases = cases # location of the output messages .err and .out created by the node self.pbs_out_dir = 'pbs_out/' self.pbs_in_dir = 'pbs_in/' # for the start number, take hour/minute combo d = datetime.datetime.today() tmp = int( str(d.hour)+format(d.minute, '02.0f') )*100 self.pbs_start_number = tmp self.qsub = qsub # if quemethod == 'time': # self.que_jobdeps = False # elif type(quemethod).__name__ == 'int': # nr_cpus = quemethod # self.que_jobdeps = True # nr_jobs = len(cases) # jobs_per_cpu = int(math.ceil(float(nr_jobs)/float(nr_cpus))) # # precalculate all the job ids # self.jobid_deps = [] # self.jobid_list = [] # count2 = self.pbs_start_number # for cpu in range(nr_cpus): # self.jobid_list.extend(range(count2, count2+jobs_per_cpu)) # # the first job to start does not have a dependency # self.jobid_deps.append(None) # self.jobid_deps.extend(range(count2, count2+jobs_per_cpu-1)) # count2 += jobs_per_cpu self.copyback_turb = True self.copyback_fnames = [] self.copyback_fnames_rename = [] self.copyto_generic = [] self.copyto_fname = [] def create(self): """ Main loop for creating the pbs scripts, based on cases, which contains the case name as key and tag dictionairy as value """ # dynamically set walltime based on the number of time steps # for thyra, make a list so we base the walltime on the slowest case self.nr_time_steps = [] self.duration = [] self.t0 = [] # '[time_stop]' '[dt_sim]' # REMARK: this i not realy consistent with how the result and log file # dirs are allowed to change for each individual case... # first check if the pbs_out_dir exists, this dir is considered to be # the same for all cases present in cases # self.tags['[run_dir]'] case0 = list(self.cases.keys())[0] path = self.cases[case0]['[run_dir]'] + self.pbs_out_dir if not os.path.exists(path): os.makedirs(path) # create pbs_in base dir path = self.cases[case0]['[run_dir]'] + self.pbs_in_dir if not os.path.exists(path): os.makedirs(path) # number the pbs jobs: count2 = self.pbs_start_number # initial cpu count is zero count1 = 1 # scan through all the cases i, i_tot = 1, len(self.cases) ended = True for case in self.cases: # get a shorter version for the current cases tag_dict: tag_dict = self.cases[case] # group all values loaded from the tag_dict here, to keep overview # the directories to SAVE the results/logs/turb files # load all relevant dir settings: the result/logfile/turbulence/zip # they are now also available for starting() and ending() parts hawc2_exe = tag_dict['[hawc2_exe]'] self.case = case.replace('.htc', '') self.sim_id = tag_dict['[sim_id]'] self.results_dir = tag_dict['[res_dir]'] self.eigenfreq_dir = tag_dict['[eigenfreq_dir]'] self.logs_dir = tag_dict['[log_dir]'] self.animation_dir = tag_dict['[animation_dir]'] self.TurbDirName = tag_dict['[turb_dir]'] self.TurbDb = tag_dict['[turb_db_dir]'] self.wakeDb = tag_dict['[wake_db_dir]'] self.meandDb = tag_dict['[meand_db_dir]'] self.WakeDirName = tag_dict['[wake_dir]'] self.MeanderDirName = tag_dict['[meander_dir]'] self.ModelZipFile = tag_dict['[model_zip]'] self.htc_dir = tag_dict['[htc_dir]'] self.hydro_dir = tag_dict['[hydro_dir]'] self.mooring_dir = tag_dict['[mooring_dir]'] self.model_path = tag_dict['[run_dir]'] self.turb_base_name = tag_dict['[turb_base_name]'] self.wake_base_name = tag_dict['[wake_base_name]'] self.meand_base_name = tag_dict['[meand_base_name]'] self.pbs_queue_command = tag_dict['[pbs_queue_command]'] self.walltime = tag_dict['[walltime]'] self.dyn_walltime = tag_dict['[auto_walltime]'] self.case_duration = tag_dict['[duration]'] # create the pbs_out_dir if necesary try: path = tag_dict['[run_dir]'] + tag_dict['[pbs_out_dir]'] if not os.path.exists(path): os.makedirs(path) self.pbs_out_dir = tag_dict['[pbs_out_dir]'] except: pass # create pbs_in subdirectories if necessary try: path = tag_dict['[run_dir]'] + tag_dict['[pbs_in_dir]'] if not os.path.exists(path): os.makedirs(path) self.pbs_in_dir = tag_dict['[pbs_in_dir]'] except: pass try: self.copyback_files = tag_dict['[copyback_files]'] self.copyback_frename = tag_dict['[copyback_frename]'] except KeyError: pass try: self.copyto_generic = tag_dict['[copyto_generic]'] self.copyto_files = tag_dict['[copyto_files]'] except KeyError: pass # one using just one file so it can be used together with the # DLC spreadsheets try: self.copyback_files = [tag_dict['[copyback_f1]']] self.copyback_frename = [tag_dict['[copyback_f1_rename]']] except KeyError: pass try: self.copyto_generic = [tag_dict['[copyto_generic_f1]']] self.copyto_files = [tag_dict['[copyto_f1]']] except KeyError: pass # related to the dynamically setting the walltime duration = float(tag_dict['[time_stop]']) dt = float(tag_dict['[dt_sim]']) self.nr_time_steps.append(duration/dt) self.duration.append(float(tag_dict['[duration]'])) self.t0.append(float(tag_dict['[t0]'])) if self.verbose: print('htc_dir in pbs.create:') print(self.htc_dir) print(self.model_path) # we only start a new case, if we have something that ended before # the very first case has to start with starting if ended: count1 = 1 # # when jobs depend on other jobs (constant node loading) # if self.que_jobdeps: # jobid = self.pref + str(self.jobid_list[i-1]) # jobid_dep = self.pref + str(self.jobid_deps[i-1]) # else: # jobid = self.pref + str(count2) # jobid_dep = None if self.short_job_names: jobid = self.pref + str(count2) else: jobid = tag_dict['[case_id]'] if self.pbs_fname_appendix and self.short_job_names: # define the path for the new pbs script pbs_in_fname = '%s_%s.p' % (tag_dict['[case_id]'], jobid) else: pbs_in_fname = '%s.p' % (tag_dict['[case_id]']) pbs_path = self.model_path + self.pbs_in_dir + pbs_in_fname # Start a new pbs script, we only need the tag_dict here self.starting(tag_dict, jobid) ended = False # ----------------------------------------------------------------- # WRITING THE ACTUAL JOB PARAMETERS # browse to the current scratch directory self.pbs += "\n\n" # mark start of single PBS mode self.pbs += '# ' + '='*78 + '\n' # evaluates to true if LAUNCH_PBS_MODE is NOT set self.pbs += '# single PBS mode: one case per PBS job\n' self.pbs += '# evaluates to true if LAUNCH_PBS_MODE is NOT set\n' self.pbs += "if [ -z ${LAUNCH_PBS_MODE+x} ] ; then\n" self.pbs += " echo\n" self.pbs += " echo 'Execute commands on scratch nodes'\n" self.pbs += " cd %s/$USER/$PBS_JOBID\n" % self.node_run_root self.pbs += " # create unique dir for each CPU\n" self.pbs += ' mkdir "%i"; cd "%i"\n' % (count1, count1) # output the current scratch directory self.pbs += " pwd\n" # zip file has been copied to the node before (in start_pbs()) # unzip now in the CPU scratch directory (zip file is one level up) self.pbs += " /usr/bin/unzip ../" + self.ModelZipFile + '\n' # create all directories, especially relevant if there are case # dependent sub directories that are not present in the ZIP file self.pbs += " mkdir -p " + self.htc_dir + '\n' self.pbs += " mkdir -p " + self.results_dir + '\n' self.pbs += " mkdir -p " + self.logs_dir + '\n' if self.TurbDirName is not None or self.TurbDirName != 'None': self.pbs += " mkdir -p " + self.TurbDirName + '\n' if self.WakeDirName and self.WakeDirName != self.TurbDirName: self.pbs += " mkdir -p " + self.WakeDirName + '\n' if self.MeanderDirName and self.MeanderDirName != self.TurbDirName: self.pbs += " mkdir -p " + self.MeanderDirName + '\n' if self.hydro_dir: self.pbs += " mkdir -p " + self.hydro_dir + '\n' # create the eigen analysis dir just in case that is necessary if self.eigenfreq_dir: self.pbs += ' mkdir -p %s\n' % self.eigenfreq_dir # and copy the htc file to the node self.pbs += " cp -R $PBS_O_WORKDIR/" + self.htc_dir \ + case +" ./" + self.htc_dir + '\n' # if there is a turbulence file data base dir, copy from there if self.TurbDb: turb_dir_src = os.path.join('$PBS_O_WORKDIR', self.TurbDb) else: turb_dir_src = os.path.join('$PBS_O_WORKDIR', self.TurbDirName) # the original behaviour makes assumptions on the turbulence box # names: turb_base_name_xxx_u.bin, turb_base_name_xxx_v.bin if self.turb_base_name is not None: turb_src = os.path.join(turb_dir_src, self.turb_base_name) self.pbs += " cp -R %s*.bin %s\n" % (turb_src, self.TurbDirName) # more generally, literally define the names of the boxes for u,v,w # components elif '[turb_fname_u]' in tag_dict: turb_u = os.path.join(turb_dir_src, tag_dict['[turb_fname_u]']) turb_v = os.path.join(turb_dir_src, tag_dict['[turb_fname_v]']) turb_w = os.path.join(turb_dir_src, tag_dict['[turb_fname_w]']) self.pbs += " cp %s %s\n" % (turb_u, self.TurbDirName) self.pbs += " cp %s %s\n" % (turb_v, self.TurbDirName) self.pbs += " cp %s %s\n" % (turb_w, self.TurbDirName) # if there is a turbulence file data base dir, copy from there if self.wakeDb and self.WakeDirName: wake_dir_src = os.path.join('$PBS_O_WORKDIR', self.wakeDb) elif self.WakeDirName: wake_dir_src = os.path.join('$PBS_O_WORKDIR', self.WakeDirName) if self.wake_base_name is not None: wake_src = os.path.join(wake_dir_src, self.wake_base_name) self.pbs += " cp -R %s*.bin %s\n" % (wake_src, self.WakeDirName) # if there is a turbulence file data base dir, copy from there if self.meandDb and self.MeanderDirName: meand_dir_src = os.path.join('$PBS_O_WORKDIR', self.meandDb) elif self.MeanderDirName: meand_dir_src = os.path.join('$PBS_O_WORKDIR', self.MeanderDirName) if self.meand_base_name is not None: meand_src = os.path.join(meand_dir_src, self.meand_base_name) self.pbs += " cp -R %s*.bin %s\n" % (meand_src, self.MeanderDirName) # copy and rename input files with given versioned name to the # required non unique generic version for fname, fgen in zip(self.copyto_files, self.copyto_generic): self.pbs += " cp -R $PBS_O_WORKDIR/%s ./%s\n" % (fname, fgen) # only apply the wine fix in PBS mode self.pbs += self.winefix # TODO: activate python env, calculate post-processing # self.pbs += 'echo `python -c "import wetb; print(wetb.__version__)"`\n' # mark end of single PBS mode self.pbs += '# ' + '='*78 + '\n\n' # end of the file copying in PBS mode # mark start of find+xargs mode self.pbs += '# ' + '-'*78 + '\n' self.pbs += '# find+xargs mode: 1 PBS job, multiple cases\n' self.pbs += "else\n" # when in find+xargs mode, browse to the relevant CPU self.pbs += ' # with find+xargs we first browse to CPU folder\n' self.pbs += ' cd "$CPU_NR"\n' self.pbs += "fi\n" # mark end of find+xargs mode self.pbs += '# ' + '-'*78 + '\n\n' self.pbs += 'echo ""\n' # mark start of single PBS mode self.pbs += '# ' + '='*78 + '\n' self.pbs += '# single PBS mode: one case per PBS job\n' self.pbs += '# evaluates to true if LAUNCH_PBS_MODE is NOT set\n' self.pbs += "if [ -z ${LAUNCH_PBS_MODE+x} ] ; then\n" # the hawc2 execution commands via wine, in PBS mode fork and wait param = (self.wine, hawc2_exe, self.htc_dir+case, self.wine_appendix) self.pbs += ' echo "execute HAWC2, fork to background"\n' self.pbs += " %s %s ./%s %s &\n" % param # FIXME: running post-processing will only work when 1 HAWC2 job # per PBS file, otherwise you have to wait for each job to finish # first and then run the post-processing for all those cases if self.maxcpu == 1: self.pbs += ' wait\n' if self.pyenv is not None: self.pbs += ' echo "POST-PROCESSING"\n' self.pbs += ' %s %s\n' % (self.pyenv_cmd, self.pyenv) self.pbs += " " self.checklogs() self.pbs += " " self.postprocessing() self.pbs += ' source deactivate\n' # mark end of single PBS mode self.pbs += '# ' + '='*78 + '\n\n' # mark start of find+xargs mode self.pbs += '# ' + '-'*78 + '\n' self.pbs += '# find+xargs mode: 1 PBS job, multiple cases\n' self.pbs += "else\n" # numactl --physcpubind=$CPU_NR param = (self.winenumactl, hawc2_exe, self.htc_dir+case, self.wine_appendix) self.pbs += ' echo "execute HAWC2, do not fork and wait"\n' self.pbs += " %s %s ./%s %s\n" % param self.pbs += ' echo "POST-PROCESSING"\n' self.pbs += " " self.checklogs() self.pbs += " " self.postprocessing() self.pbs += "fi\n" # mark end of find+xargs mode self.pbs += '# ' + '-'*78 + '\n' #self.pbs += "wine get_mac_adresses" + '\n' # self.pbs += "cp -R ./*.mac $PBS_O_WORKDIR/." + '\n' # ----------------------------------------------------------------- # and we end when the cpu's per node are full if int(count1/self.maxcpu) == 1: # write the end part of the pbs script self.ending(pbs_path) ended = True # print progress: replace = ((i/self.maxcpu), (i_tot/self.maxcpu), self.walltime) if not self.silent: print('pbs script %3i/%i walltime=%s' % replace) count2 += 1 i += 1 # the next cpu count1 += 1 # it could be that the last node was not fully loaded. In that case # we do not have had a succesfull ending, and we still need to finish if not ended: # write the end part of the pbs script self.ending(pbs_path) # progress printing replace = ( (i/self.maxcpu), (i_tot/self.maxcpu), self.walltime ) if not self.silent: print('pbs script %3i/%i walltime=%s, partially loaded' % replace) # print 'pbs progress, script '+format(i/self.maxcpu,'2.0f')\ # + '/' + format(i_tot/self.maxcpu, '2.0f') \ # + ' partially loaded...' def starting(self, tag_dict, jobid): """ First part of the pbs script """ # a new clean pbs script! self.pbs = '' self.pbs += "### Standard Output" + '\n' case_id = tag_dict['[case_id]'] # PBS job name self.pbs += "#PBS -N %s\n" % (jobid) self.pbs += "#PBS -o ./" + self.pbs_out_dir + case_id + ".out" + '\n' # self.pbs += "#PBS -o ./pbs_out/" + jobid + ".out" + '\n' self.pbs += "### Standard Error" + '\n' self.pbs += "#PBS -e ./" + self.pbs_out_dir + case_id + ".err" + '\n' # self.pbs += "#PBS -e ./pbs_out/" + jobid + ".err" + '\n' self.pbs += '#PBS -W umask=0003\n' self.pbs += "### Maximum wallclock time format HOURS:MINUTES:SECONDS\n" # self.pbs += "#PBS -l walltime=" + self.walltime + '\n' self.pbs += "#PBS -l walltime=[walltime]\n" if self.qsub == 'time': self.pbs += "#PBS -a [start_time]" + '\n' elif self.qsub == 'depend': # set job dependencies, job_id refers to PBS job_id, which is only # assigned to a job at the moment it gets qsubbed into the que self.pbs += "[nodeps]PBS -W depend=afterany:[job_id]\n" # if self.que_jobdeps: # self.pbs += "#PBS -W depend=afterany:%s\n" % jobid_dep # else: # self.pbs += "#PBS -a [start_time]" + '\n' # in case of gorm, we need to make it work correctly. Now each job # has a different scratch dir. If we set maxcpu to 12 they all have # the same scratch dir. In that case there should be done something # differently # specify the number of nodes and cpu's per node required if self.maxcpu > 1: # Number of nodes and cpus per node (ppn) lnodes = int(math.ceil(len(self.cases)/float(self.maxcpu))) lnodes = 1 self.pbs += "#PBS -l nodes=%i:ppn=%i\n" % (lnodes, self.maxcpu) else: self.pbs += "#PBS -l nodes=1:ppn=1\n" # Number of nodes and cpus per node (ppn) self.pbs += "### Queue name" + '\n' # queue names for Thyra are as follows: # short walltime queue (shorter than an hour): '#PBS -q xpresq' # or otherwise for longer jobs: '#PBS -q workq' self.pbs += self.pbs_queue_command + '\n' # mark start of single PBS mode self.pbs += '\n' + '# ' + '='*78 + '\n' # ignore all the file copying when running in xargs mode: # when varibale LAUNCH_PBS_MODE is set, file copying is ignored # and will have to be done elsewhere # we do this so the same launch script can be used either with the node # scheduler and the PBS system (for example when re-running cases) # evaluates to true if LAUNCH_PBS_MODE is NOT set self.pbs += '# single PBS mode: one case per PBS job\n' self.pbs += '# evaluates to true if LAUNCH_PBS_MODE is NOT set\n' self.pbs += "if [ -z ${LAUNCH_PBS_MODE+x} ] ; then\n" self.pbs += " ### Create scratch directory and copy data to it\n" # output the current directory self.pbs += " cd $PBS_O_WORKDIR" + '\n' self.pbs += ' echo "current working dir (pwd):"\n' self.pbs += " pwd\n" # The batch system on Gorm allows more than one job per node. # Because of this the scratch directory name includes both the # user name and the job ID, that is /scratch/$USER/$PBS_JOBID # if not scratch, make the dir if self.node_run_root != '/scratch': self.pbs += ' mkdir -p %s/$USER\n' % self.node_run_root self.pbs += ' mkdir -p %s/$USER/$PBS_JOBID\n' % self.node_run_root # copy the zip files to the scratch dir on the node self.pbs += " cp -R ./" + self.ModelZipFile + \ ' %s/$USER/$PBS_JOBID\n' % (self.node_run_root) self.pbs += "fi\n" # mark end of single PBS mode self.pbs += '# ' + '='*78 + '\n' def ending(self, pbs_path): """ Last part of the pbs script, including command to write script to disc COPY BACK: from node to """ self.pbs += "\n\n" self.pbs += "### Epilogue\n" # mark start of single PBS mode self.pbs += '# ' + "="*78 + "\n" # evaluates to true if LAUNCH_PBS_MODE is NOT set self.pbs += '# single PBS mode: one case per PBS job\n' self.pbs += '# evaluates to true if LAUNCH_PBS_MODE is NOT set\n' self.pbs += "if [ -z ${LAUNCH_PBS_MODE+x} ] ; then\n" self.pbs += " ### wait for jobs to finish\n" self.pbs += " wait\n" self.pbs += ' echo ""\n' self.pbs += ' echo "Copy back from scratch directory"\n' for i in range(1, self.maxcpu+1, 1): # navigate to the cpu dir on the node # The batch system on Gorm allows more than one job per node. # Because of this the scratch directory name includes both the # user name and the job ID, that is /scratch/$USER/$PBS_JOBID self.copyback_all_files("pbs_mode", i) # find+xargs mode only makes sense when maxcpu==1, cpu handling # for this mode is handled elsewhere if self.maxcpu == 1: # mark start of find+xargs mode self.pbs += '# ' + "-"*78 + "\n" self.pbs += '# find+xargs mode: 1 PBS job, multiple cases\n' self.pbs += 'else\n' self.copyback_all_files("find+xargs", None) # mark end of find+xargs mode # self.pbs += '# ' + "-"*78 + "\n" # # and delete it all (but that is not allowed) # self.pbs += 'cd ..\n' # self.pbs += 'ls -lah\n' # self.pbs += 'echo $PBS_JOBID\n' # self.pbs += 'rm -r $PBS_JOBID\n' # Delete the batch file at the end. However, is this possible since # the batch file is still open at this point???? # self.pbs += "rm " self.pbs += 'fi\n' # base walltime on the longest simulation in the batch nr_time_steps = max(self.nr_time_steps) # TODO: take into acccount the difference between time steps with # and without output. This penelaty also depends on the number of # channels outputted. So from 0 until t0 we have no penalty, # from t0 until t0+duration we have the output penalty. # always a predifined lead time to account for startup losses tmax = int(nr_time_steps*self.secperiter*self.iterperstep + self.tlead) if self.dyn_walltime: dt_seconds = datetime.datetime.fromtimestamp(tmax) self.walltime = dt_seconds.strftime('%H:%M:%S') self.pbs = self.pbs.replace('[walltime]', self.walltime) else: self.pbs = self.pbs.replace('[walltime]', self.walltime) # and reset the nr_time_steps list for the next pbs job file self.nr_time_steps = [] self.t0 = [] self.duration = [] # TODO: add logfile checking support directly here. In that way each # node will do the logfile checking and statistics calculations right # after the simulation. Figure out a way how to merge the data from # all the different cases afterwards self.pbs += "exit\n" if self.verbose: print('writing pbs script to path: ' + pbs_path) # and write the script to a file: write_file(pbs_path, self.pbs, 'w') # make the string empty again, for memory self.pbs = '' def copyback_all_files(self, mode, cpu_nr): """Copy back all the files from either scratch to run_dir (PBS mode), or from CPU sub-directory back to main directory in find+xargs mode. """ if mode=="find+xargs": foper = "rsync -a --remove-source-files" # move files instead of copy dst = os.path.join('..', self.sim_id, '') dst_db = '../' cd2model = " cd %s\n" % os.path.join(self.node_run_root, '$USER', '$PBS_JOBID', '$CPU_NR', '') pbs_mode = False else: foper = "cp -R" dst = "$PBS_O_WORKDIR/" dst_db = dst pbs_mode = True cd2model = " cd %s\n" % os.path.join(self.node_run_root, '$USER', '$PBS_JOBID', '%i' % cpu_nr, '') # navigate to the cpu dir on the node # The batch system on Gorm/Jess allows more than one job per node. # Because of this the scratch directory name includes both the # user name and the job ID, that is /scratch/$USER/$PBS_JOBID/CPU_NR self.pbs += cd2model # create the log, res etc dirs in case they do not exist. Only relevant # for pbs_mode, they are created in advance in find+xargs if pbs_mode: mk = ' mkdir -p' self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.results_dir)) self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.logs_dir)) if self.animation_dir: self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.animation_dir)) if self.copyback_turb and self.TurbDb: self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.TurbDb)) elif self.copyback_turb: self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.TurbDirName)) if self.copyback_turb and self.wakeDb: self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.wakeDb)) elif self.WakeDirName and self.WakeDirName != self.TurbDirName: self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.WakeDirName)) if self.copyback_turb and self.meandDb: self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.meandDb)) elif self.MeanderDirName and self.MeanderDirName != self.TurbDirName: self.pbs += "%s %s\n" % (mk, os.path.join(dst, self.MeanderDirName)) # and copy the results and log files frome the scratch to dst res_dst = os.path.join(dst, self.results_dir, ".") self.pbs += " %s %s. %s\n" % (foper, self.results_dir, res_dst) log_dst = os.path.join(dst, self.logs_dir, ".") self.pbs += " %s %s. %s\n" % (foper, self.logs_dir, log_dst) if self.animation_dir: ani_dst = os.path.join(dst, self.animation_dir, ".") self.pbs += " %s %s. %s\n" % (foper, self.animation_dir, ani_dst) if self.eigenfreq_dir: # just in case the eig dir has subdirs for the results, only # select the base path and cp -r will take care of the rest p1 = self.eigenfreq_dir.split('/')[0] p2 = os.path.join(dst, p1, ".") self.pbs += " cp -R %s/. %s\n" % (p1, p2) # for eigen analysis with floater, modes are in root eig_dir_sys = os.path.join(dst, self.eigenfreq_dir, 'system/', '.') self.pbs += ' mkdir -p %s\n' % eig_dir_sys self.pbs += " cp -R mode* %s\n" % eig_dir_sys self.pbs += " %s mode* %s\n" % (foper, eig_dir_sys) # only copy the turbulence files back if they do not exist # for all *.bin files on the node cmd = ' for i in `ls *.bin`; do if [ -e %s$i ]; ' cmd += 'then echo "$i exists no copyback"; else echo "$i copyback"; ' cmd += 'cp $i %s; fi; done\n' # copy back turbulence file? # browse to the node turb dir self.pbs += '\n echo ""\n' self.pbs += ' echo "COPY BACK TURB IF APPLICABLE"\n' if self.copyback_turb and self.TurbDb: self.pbs += ' cd %s\n' % self.TurbDirName tmp = (os.path.join(dst_db, self.TurbDb, ''),)*2 self.pbs += cmd % tmp # and back to normal model root self.pbs += cd2model elif self.copyback_turb: self.pbs += ' cd %s\n' % self.TurbDirName tmp = (os.path.join(dst, self.TurbDirName, ''),)*2 self.pbs += cmd % tmp # and back to normal model root self.pbs += cd2model if self.copyback_turb and self.wakeDb: self.pbs += ' cd %s\n' % self.WakeDirName tmp = (os.path.join(dst_db, self.wakeDb, ''),)*2 self.pbs += cmd % tmp # and back to normal model root self.pbs += cd2model elif self.copyback_turb and self.WakeDirName: self.pbs += ' cd %s\n' % self.WakeDirName tmp = (os.path.join(dst, self.WakeDirName, ''),)*2 self.pbs += cmd % tmp # and back to normal model root self.pbs += cd2model if self.copyback_turb and self.meandDb: self.pbs += ' cd %s\n' % self.MeanderDirName tmp = (os.path.join(dst_db, self.meandDb, ''),)*2 self.pbs += cmd % tmp # and back to normal model root self.pbs += cd2model elif self.copyback_turb and self.MeanderDirName: self.pbs += ' cd %s\n' % self.MeanderDirName tmp = (os.path.join(dst, self.MeanderDirName, ''),)*2 self.pbs += cmd % tmp # and back to normal model root self.pbs += cd2model self.pbs += ' echo "END COPY BACK TURB"\n' self.pbs += ' echo ""\n\n' # copy back any other kind of files, as specified in copyback_files self.pbs += ' echo "COPYBACK [copyback_files]/[copyback_frename]"\n' if len(self.copyback_frename) == 0: self.copyback_frename = self.copyback_files for fname, fnew in zip(self.copyback_files, self.copyback_frename): dst_fnew = os.path.join(dst, fnew) self.pbs += " %s %s %s\n" % (foper, fname, dst_fnew) self.pbs += ' echo "END COPYBACK"\n' self.pbs += ' echo ""\n' if pbs_mode: # check what is left self.pbs += ' echo ""\n' self.pbs += ' echo "following files are on ' self.pbs += 'node/cpu %i (find .):"\n' % cpu_nr self.pbs += ' find .\n' self.pbs += '# ' + '='*78 + '\n' else: self.pbs += '# ' + '-'*78 + '\n' def checklogs(self): """ """ self.pbs += 'python -c "from wetb.prepost import statsdel; ' rpl = (os.path.join(self.logs_dir, self.case+'.log')) self.pbs += 'statsdel.logcheck(\'%s\')"\n' % rpl def postprocessing(self): """Run post-processing just after HAWC2 has ran """ self.pbs += 'python -c "from wetb.prepost import statsdel; ' fsrc = os.path.join(self.results_dir, self.case) rpl = (fsrc, str(self.case_duration), '.csv') self.pbs += ('statsdel.calc(\'%s\', no_bins=46, m=[3, 4, 6, 8, 10, 12], ' 'neq=%s, i0=0, i1=None, ftype=\'%s\')"\n' % rpl) def check_results(self, cases): """ Cross-check if all simulations on the list have returned a simulation. Combine with ErrorLogs to identify which errors occur where. """ cases_fail = {} if not self.silent: print('checking if all log and result files are present...', end='') # check for each case if we have results and a log file for cname, case in cases.items(): run_dir = case['[run_dir]'] res_dir = case['[res_dir]'] log_dir = case['[log_dir]'] # FIXME: HAWC2 outputs result and logfile always in lower cases cname_ = cname.replace('.htc', '').lower() f_log = os.path.join(run_dir, log_dir, cname_) f_res = os.path.join(run_dir, res_dir, cname_) if not os.path.exists(f_log + '.log'): cases_fail[cname] = copy.copy(cases[cname]) continue try: size_sel = os.stat(f_res + '.sel').st_size size_dat = os.stat(f_res + '.dat').st_size except OSError: size_sel = 0 size_dat = 0 if size_sel < 5 or size_dat < 5: cases_fail[cname] = copy.copy(cases[cname]) if not self.silent: print('done!') # length will be zero if there are no failures return cases_fail # TODO: rewrite the error log analysis to something better. Take different # approach: start from the case and see if the results are present. Than we # also have the tags_dict available when log-checking a certain case class ErrorLogs(windIO.LogFile): """ Analyse all HAWC2 log files in any given directory ================================================== Usage: logs = ErrorLogs() logs.MsgList : list with the to be checked messages. Add more if required logs.ResultFile : name of the result file (default is ErrorLog.csv) logs.PathToLogs : specify the directory where the logsfile reside, the ResultFile will be saved in the same directory. It is also possible to give the path of a specific file, the logfile will not be saved in this case. Save when all required messages are analysed with save() logs.check() to analyse all the logfiles and create the ResultFile logs.save() to save after single file analysis logs.MsgListLog : [ [case, line nr, error1, line nr, error2, ....], [], ...] holding the error messages, empty if no err msg found will survive as long as the logs object exists. Keep in mind that when processing many messages with many error types (as defined) in MsgList might lead to an increase in memory usage. logs.MsgListLog2 : dict(key=case, value=[found_error, exit_correct] where found_error and exit_correct are booleans. Found error will just indicate whether or not any error message has been found All files in the speficied folder (PathToLogs) will be evaluated. When Any item present in MsgList occurs, the line number of the first occurance will be displayed in the ResultFile. If more messages are required, add them to the MsgList """ # TODO: move to the HAWC2 plugin for cases def __init__(self, silent=False, cases=None, resultfile='ErrorLog.csv'): # call init from base class super(ErrorLogs, self).__init__() self.PathToLogs = '' self.ResultFile = resultfile self.cases = cases self.silent = silent # TODO: save this not a csv text string but a df_dict, and save as excel # and DataFrame! def check(self, appendlog=False, save_iter=False): """Check all log files that are to be found in the directory ErrorLogs.PathToLogs, or check the specific log file if ErrorLogs.PathToLogs points to a specific log file. """ # MsgListLog = [] FileList = [] # if a directory, load all files first if os.path.isdir(self.PathToLogs): for files in os.walk(self.PathToLogs): FileList.append(files) NrFiles = len(FileList[0][2]) else: # simulate one entry on FileList[0][2], give it the file name # and save the directory on in self.PathToLogs NrFiles = 1 FileList.append([ [],[],[os.path.basename(self.PathToLogs)] ]) self.PathToLogs = os.path.dirname(self.PathToLogs) single_file = True i=1 # walk trough the files present in the folder path for fname in FileList[0][2]: # progress indicator if NrFiles > 1: if not self.silent: print('progress: ' + str(i) + '/' + str(NrFiles)) # open the current log file f_log = os.path.join(self.PathToLogs, fname) if self.cases is not None: case = self.cases[fname.replace('.log', '.htc')] else: case = None self.readlog(f_log, case=case, save_iter=save_iter) i += 1 # # if no messages are found for the current file, than say so: # if len(MsgList2) == len(self.MsgList): # tempLog[-1] = 'NO MESSAGES FOUND' # if we have only one file, don't save the log file to disk. It is # expected that if we analyse many different single files, this will # cause a slower script if single_file: # now we make it available over the object to save and let it grow # over many analysis # self.MsgListLog = copy.copy(MsgListLog) pass else: self.save(appendlog=appendlog) def save(self, appendlog=False, suffix=None): contents = self._header() contents = self._msglistlog2csv(contents) # write csv file to disk, append to facilitate more logfile analysis if isinstance(suffix, str): tmp = self.ResultFile.replace('.csv', '_%s.csv' % suffix) fname = os.path.join(self.PathToLogs, tmp) else: fname = os.path.join(self.PathToLogs, str(self.ResultFile)) if not self.silent: print('Error log analysis saved at:') print(fname) if appendlog: mode = 'a' else: mode = 'w' with open(fname, mode) as f: f.write(contents) class ModelData(object): """ Second generation ModelData function. The HawcPy version is crappy, buggy and not mutch of use in the optimisation context. """ class st_headers(object): """ Indices to the respective parameters in the HAWC2 st data file """ r = 0 m = 1 x_cg = 2 y_cg = 3 ri_x = 4 ri_y = 5 x_sh = 6 y_sh = 7 E = 8 G = 9 Ixx = 10 Iyy = 11 I_p = 12 k_x = 13 k_y = 14 A = 15 pitch = 16 x_e = 17 y_e = 18 def __init__(self, verbose=False, silent=False): self.verbose = verbose self.silent = silent # define the column width for printing self.col_width = 13 # formatting and precision self.float_hi = 9999.9999 self.float_lo = 0.01 self.prec_float = ' 9.05f' self.prec_exp = ' 8.04e' self.prec_loss = 0.01 #0 1 2 3 4 5 6 7 8 9 10 11 #r m x_cg y_cg ri_x ri_y x_sh y_sh E G I_x I_y #12 13 14 15 16 17 18 #I_p/K k_x k_y A pitch x_e y_e # 19 cols self.st_column_header_list = ['r', 'm', 'x_cg', 'y_cg', 'ri_x', 'ri_y', 'x_sh', 'y_sh', 'E', 'G', 'I_x', 'I_y', 'J', 'k_x', 'k_y', 'A', 'pitch', 'x_e', 'y_e'] self.st_column_header_list_latex = ['r','m','x_{cg}','y_{cg}','ri_x', 'ri_y', 'x_{sh}','y_{sh}','E', 'G', 'I_x', 'I_y', 'J', 'k_x', 'k_y', 'A', 'pitch', 'x_e', 'y_e'] self.st_fpm_cols = ['r', 'm', 'x_cg', 'y_cg', 'ri_x', 'ri_y', 'pitch', 'x_e', 'y_e', 'E11', 'E12', 'E13', 'E14', 'E15', 'E16', 'E22', 'E23', 'E24', 'E25', 'E26', 'E33', 'E34', 'E35', 'E36', 'E44', 'E45', 'E46', 'E55', 'E56', 'E66'] # set column names/indeices as class attributes for i, col in enumerate(self.st_fpm_cols): setattr(self, col, i) # make the column header self.column_header_line = 19 * self.col_width * '=' + '\n' for k in self.st_column_header_list: self.column_header_line += k.rjust(self.col_width) self.column_header_line += '\n' + (19 * self.col_width * '=') + '\n' def fromline(self, line, separator=' '): # TODO: move this to the global function space (dav-general-module) """ split a line, but ignore any blank spaces and return a list with only the values, not empty places """ # remove all tabs, new lines, etc? (\t, \r, \n) line = line.replace('\t',' ').replace('\n','').replace('\r','') # trailing and ending spaces line = line.strip() line = line.split(separator) values = [] for k in range(len(line)): if len(line[k]) > 0: #and k == item_nr: values.append(line[k]) # break return values def load_st(self, file_path, file_name): """ Now a better format: st_dict has following key/value pairs 'nset' : total number of sets in the file (int). This should be autocalculated every time when writing a new file. '007-000-0' : set number line in one peace '007-001-a' : comments for set-subset nr 07-01 (str) '007-001-b' : subset nr and number of data points, should be autocalculate every time you generate a file '007-001-d' : data for set-subset nr 07-01 (ndarray(n,19)) NOW WE ONLY CONSIDER SUBSET COMMENTS, SET COMMENTS, HOW ARE THEY TREADED NOW?? st_dict is for easy remaking the same file. We need a different format for easy reading the comments as well. For that we have the st_comments """ # TODO: store this in an HDF5 format! This is perfect for that. # read all the lines of the file into memory self.st_path, self.st_file = file_path, file_name FILE = open(os.path.join(file_path, file_name)) lines = FILE.readlines() FILE.close() subset = False st_dict = dict() st_comments = dict() for i, line in enumerate(lines): # convert line to list space seperated list line_list = self.fromline(line) # see if the first character is marking something if i == 0: # it is possible that the NSET line is not defined parts = line.split(' ') try: for k in range(10): parts.remove(' ') # throws error when can't find except ValueError: pass # we don't care what is on the nset line, just capture if # there are any comments lines set_nr = 0 subset_nr = 0 st_dict['000-000-0'] = line # marks the start of a set if line[0] == '#': #sett = True # first character is the #, the rest is the number set_nr = int(line_list[0][1:]) st_dict['%03i-000-0' % set_nr] = line # and reset subset nr to zero now subset_nr = 0 subset_nr_track = 0 # and comments only format, back to one string st_comments['%03i-000-0' % set_nr] = ' '.join(line_list[1:]) # marks the start of a subset elif line[0] == '$': subset_nr_track += 1 subset = True subset_nr = int(line_list[0][1:]) # and comments only format, back to one string setid = '%03i-%03i-b' % (set_nr, subset_nr) st_comments[setid] = ' '.join(line_list[2:]) # check if the number read corresponds to tracking if subset_nr is not subset_nr_track: msg = 'subset_nr and subset_nr_track do not match' raise UserWarning(msg) nr_points = int(line_list[1]) st_dict[setid] = line # prepare read data points sub_set_arr = scipy.zeros((nr_points,19), dtype=np.float64) # keep track of where we are on the data array, initialize # to 0 for starters point = 0 # in case we are not in subset mode, we only have comments left elif not subset: # FIXME: how are we dealing with set comments now? # subset comments are coming before the actual subset # so we account them to one set later than we are now #if subset_nr > 0 : key = '%03i-%03i-a' % (set_nr, subset_nr+1) # in case it is not the first comment line if key in st_dict: st_dict[key] += line else: st_dict[key] = line ## otherwise we have the set comments #else: #key = '%03i-%03i-a' % (set_nr, subset_nr) ## in case it is not the first comment line #if st_dict.has_key(key): st_dict[key] += line #else: st_dict[key] = line # in case we have the data points, make sure there are enough # data poinst present, raise an error if it doesn't elif len(line_list)==19 and subset: # we can store it in the array sub_set_arr[point,:] = line_list # on the last entry: if point == nr_points-1: # save to the dict: st_dict['%03i-%03i-d' % (set_nr, subset_nr)]= sub_set_arr # and indicate we're done subsetting, next we can have # either set or subset comments subset = False point += 1 #else: #msg='error in st format: don't know where to put current line' #raise UserWarning, msg self.st_dict = st_dict self.st_comments = st_comments def _format_nr(self, number): """ Automatic format the number prec_loss : float, default=0.01 acceptible precision loss expressed in % """ # the formatting of the number numabs = abs(number) # just a float precision defined in self.prec_float if (numabs < self.float_hi and numabs > self.float_lo): numfor = format(number, self.prec_float) # if it is zero, just simply print as 0.0 elif number == 0.0: numfor = format(number, ' 1.1f') # exponentional, precision defined in self.prec_exp else: numfor = format(number, self.prec_exp) try: loss = 100.0*abs(1 - (float(numfor)/number)) except ZeroDivisionError: if abs(float(numfor)) > 0.00000001: msg = 'precision loss, from %1.10f to %s' \ % (number, numfor.strip()) raise ValueError('precesion loss for new st file') else: loss = 0 if loss > self.prec_loss: msg = 'precision loss, from %1.10f to %s (%f pc)' \ % (number, numfor.strip(), loss) raise ValueError(msg) return numfor def write_st(self, file_path, file_name, print_header=False): """ prec_loss : float, default=0.01 acceptible precision loss expressed in % """ # TODO: implement all the tests when writing on nset, number of data # points, subsetnumber sequence etc content = '' # sort the key list keysort = list(self.st_dict.keys()) keysort.sort() for key in keysort: # in case we are just printing what was recorded before if not key.endswith('d'): content += self.st_dict[key] # else we have an array else: # cycle through data points and print them orderly: control # precision depending on the number, keep spacing constant # so it is easy to read the textfile for m in range(self.st_dict[key].shape[0]): for n in range(self.st_dict[key].shape[1]): # TODO: check what do we lose here? # we are coming from a np.float64, as set in the array # but than it will not work with the format() number = float(self.st_dict[key][m,n]) numfor = self._format_nr(number) content += numfor.rjust(self.col_width) content += '\n' if print_header: content += self.column_header_line # and write file to disk again FILE = open(file_path + file_name, 'w') FILE.write(content) FILE.close() if not self.silent: print('st file written:', file_path + file_name) def write_latex(self, fpath, selection=[]): """ Write a table in Latex format based on the data in the st file. selection : list [ [setnr, subsetnr, table caption], [setnr, subsetnr, caption],...] if not specified, all subsets will be plotted """ cols_p1 = ['r [m]', 'm [kg/m]', 'm(ri{_x})^2 [kgNm^2]', 'm(ri{_y})^2 [kgNm^2]', 'EI_x [Nm^2]', 'EI_y [Nm^2]', 'EA [N]', 'GJ [\\frac{Nm^2}{rad}]'] cols_p2 = ['r [m]', 'x_cg [m]', 'y_cg [m]', 'x_sh [m]', 'y_sh [m]', 'x_e [m]', 'y_e [m]', 'k_x [-]', 'k_y [-]', 'pitch [deg]'] if len(selection) < 1: for key in self.st_dict: # but now only take the ones that hold data if key[-1] == 'd': selection.append([int(key[:3]), int(key[4:7])]) for i,j, caption in selection: # get the data try: # set comment should be the name of the body set_comment = self.st_comments['%03i-000-0' % (i)] # subset_comment = self.st_comments['%03i-%03i-b' % (i,j)] st_arr = self.st_dict['%03i-%03i-d' % (i,j)] except AttributeError: msg = 'ModelData object md is not loaded properly' raise AttributeError(msg) # build the latex table header # textable = u"\\begin{table}[b!]\n" # textable += u"\\begin{center}\n" textable_p1 = "\\centering\n" textable_p1 += "\\begin{tabular}" # configure the column properties tmp = ['C{2.0 cm}' for k in cols_p1] tmp = "|".join(tmp) textable_p1 += '{|' + tmp + '|}' textable_p1 += '\hline\n' # add formula mode for the headers tmp = [] for k in cols_p1: k1, k2 = k.split(' ') tmp.append('$%s$ $%s$' % (k1,k2) ) # tmp = [u'$%s$' % k for k in cols_p1] textable_p1 += ' & '.join(tmp) textable_p1 += '\\\\ \n' textable_p1 += '\hline\n' textable_p2 = "\\centering\n" textable_p2 += "\\begin{tabular}" # configure the column properties tmp = ['C{1.5 cm}' for k in cols_p2] tmp = "|".join(tmp) textable_p2 += '{|' + tmp + '|}' textable_p2 += '\hline\n' # add formula mode for the headers tmp = [] for k in cols_p2: k1, k2 = k.split(' ') tmp.append('$%s$ $%s$' % (k1,k2) ) # tmp = [u'$%s$ $%s$' % (k1, k2) for k in cols_p2] # hack: spread the last element over two lines # tmp[-1] = '$pitch$ $[deg]$' textable_p2 += ' & '.join(tmp) textable_p2 += '\\\\ \n' textable_p2 += '\hline\n' for row in range(st_arr.shape[0]): r = st_arr[row, self.st_headers.r] m = st_arr[row,self.st_headers.m] x_cg = st_arr[row,self.st_headers.x_cg] y_cg = st_arr[row,self.st_headers.y_cg] ri_x = st_arr[row,self.st_headers.ri_x] ri_y = st_arr[row,self.st_headers.ri_y] x_sh = st_arr[row,self.st_headers.x_sh] y_sh = st_arr[row,self.st_headers.y_sh] E = st_arr[row,self.st_headers.E] G = st_arr[row,self.st_headers.G] Ixx = st_arr[row,self.st_headers.Ixx] Iyy = st_arr[row,self.st_headers.Iyy] I_p = st_arr[row,self.st_headers.I_p] k_x = st_arr[row,self.st_headers.k_x] k_y = st_arr[row,self.st_headers.k_y] A = st_arr[row,self.st_headers.A] pitch = st_arr[row,self.st_headers.pitch] x_e = st_arr[row,self.st_headers.x_e] y_e = st_arr[row,self.st_headers.y_e] # WARNING: same order as the labels defined in variable "cols"! p1 = [r, m, m*ri_x*ri_x, m*ri_y*ri_y, E*Ixx, E*Iyy, E*A,I_p*G] p2 = [r, x_cg, y_cg, x_sh, y_sh, x_e, y_e, k_x, k_y, pitch] textable_p1 += " & ".join([self._format_nr(k) for k in p1]) textable_p1 += '\\\\ \n' textable_p2 += " & ".join([self._format_nr(k) for k in p2]) textable_p2 += '\\\\ \n' # default caption if caption == '': caption = 'HAWC2 cross sectional parameters for body: %s' % set_comment textable_p1 += "\hline\n" textable_p1 += "\end{tabular}\n" textable_p1 += "\caption{%s}\n" % caption # textable += u"\end{center}\n" # textable += u"\end{table}\n" fname = '%s-%s-%03i-%03i_p1' % (self.st_file, set_comment, i, j) fname = fname.replace('.', '') + '.tex' with open(fpath + fname, 'w') as f: f.write(textable_p1) textable_p2 += "\hline\n" textable_p2 += "\end{tabular}\n" textable_p2 += "\caption{%s}\n" % caption # textable += u"\end{center}\n" # textable += u"\end{table}\n" fname = '%s-%s-%03i-%03i_p2' % (self.st_file, set_comment, i, j) fname = fname.replace('.', '') + '.tex' with open(fpath + fname, 'w') as f: f.write(textable_p2) class WeibullParameters(object): def __init__(self): self.Vin = 4. self.Vr = 12. self.Vout = 26. self.Vref = 50. self.Vstep = 2. self.shape_k = 2. # FIXME: Cases has a memory leek somewhere, this whole thing needs to be # reconsidered and rely on a DataFrame instead of a dict! class Cases(object): """ Class for the old htc_dict ========================== Formerly known as htc_dict: a dictionary with on the key a case identifier (case name) and the value is a dictionary holding all the different tags and value pairs which define the case TODO: define a public API so that plugin's can be exposed in a standarized way using pre defined variables: * pandas DataFrame backend instead of a dictionary * generic, so not bound to HAWC2. Goal: manage a lot of simulations and their corresponding inputs/outus * integration with OpenMDAO? * case id (hash) * case name (which is typically created with variable_tag_name method) * results * inputs * outputs a variable tags that has a dictionary mirror for database alike searching launch, post_launch, prepare_(re)launch should be methods of this or inheret from Cases Create a method to add and remove cases from the pool so you can perform some analysis on them. Maybe make a GUI that present a list with current cases in the pool and than checkboxes to remove them. Remove the HAWC2 specific parts to a HAWC2 plugin. The HAWC2 plugin will inheret from Cases. Proposed class name: HAWC2Cases, XFOILCases Rename cases to pool? A pool contains several cases, mixing several sim_id's? create a unique case ID based on the hash value of all the tag+values? """ # TODO: add a method that can reload a certain case_dict, you change # some parameters for each case (or some) and than launch again #def __init__(self, post_dir, sim_id, resdir=False): def __init__(self, *args, **kwargs): """ Either load the cases dictionary if post_dir and sim_id is given, otherwise the input is a cases dictionary Paramters --------- cases : dict The cases dictionary in case there is only one argument post_dir : str When using two arguments sim_id : str or list When using two arguments resdir : str, default=False loadstats : boolean, default=False rem_failed : boolean, default=True """ resdir = kwargs.get('resdir', False) self.loadstats = kwargs.get('loadstats', False) self.rem_failed = kwargs.get('rem_failed', True) self.config = kwargs.get('config', {}) self.complib = kwargs.get('complib', 'blosc') # determine the input argument scenario if len(args) == 1: if type(args[0]).__name__ == 'dict': self.cases = args[0] sim_id = False else: raise ValueError('One argument input should be a cases dict') elif len(args) == 2: self.post_dir = args[0] sim_id = args[1] else: raise ValueError('Only one or two arguments are allowed.') # if sim_id is a list, than merge all sim_id's of that list if type(sim_id).__name__ == 'list': # stats, dynprop and fail are empty dictionaries if they do not # exist self.merge_sim_ids(sim_id) # and define a new sim_id based on all items from the list self.sim_id = '_'.join(sim_id) # in case we still need to load the cases dict elif type(sim_id).__name__ == 'str': self.sim_id = sim_id self._get_cases_dict(self.post_dir, sim_id) # load the statistics if applicable if self.loadstats: self.stats_df, self.Leq_df, self.AEP_df = self.load_stats() # change the results directory if applicable if resdir: self.change_results_dir(resdir) # # try to load failed cases and remove them # try: # self.load_failed(sim_id) # self.remove_failed() # except IOError: # pass #return self.cases def select(self, search_keyval=False, search_key=False): """ Select only a sub set of the cases Select either search_keyval or search_key. Using both is not supported yet. Run select twice to achieve the same effect. If both are False, cases will be emptied! Parameters ---------- search_keyval : dictionary, default=False Keys are the column names. If the values match the ones in the database, the respective row gets selected. Each tag is hence a unique row identifier search_key : dict, default=False The key is the string that should either be inclusive (value TRUE) or exclusive (value FALSE) in the case key """ db = misc.DictDB(self.cases) if search_keyval: db.search(search_keyval) elif search_key: db.search_key(search_keyval) else: db.dict_sel = {} # and remove all keys that are not in the list remove = set(self.cases) - set(db.dict_sel) for k in remove: self.cases.pop(k) def launch(self, runmethod='local', verbose=False, copyback_turb=True, silent=False, check_log=True): """ Launch all cases """ launch(self.cases, runmethod=runmethod, verbose=verbose, silent=silent, check_log=check_log, copyback_turb=copyback_turb) def post_launch(self, save_iter=False, pbs_failed_path=False, suffix=None, path_errorlog=None, silent=False): """ Post Launching Maintenance check the logs files and make sure result files are present and accounted for. Parameters ---------- save_iter : boolean, default=False Set to True to save the number of iterations per time step in *.iter file (in the same folder as the logfile) pbs_failed_path : str, default=False If not False, specify the path to which the *.p files of the failed cases should be copied to. For example, the dlctemplate will set this value to "pbs_in_fail". path_errorlog : str, default=None Root path of the error logfiles. If set to None (default), the value set in the [run_dir] tag is used as the root folder of the logfiles. suffix : str, default=None If not None, the suffix will be appended to file name of the error log analysis file as follows: "ErrorLog_suffix.csv". """ # TODO: integrate global post_launch in here self.cases_fail = post_launch(self.cases, save_iter=save_iter, suffix=suffix, path_errorlog=path_errorlog) if pbs_failed_path is not False: copy_pbs_in_failedcases(self.cases_fail, path=pbs_failed_path, silent=silent) if self.rem_failed: self.remove_failed() def load_case(self, case): try: iterations = self.load_iterations(case) except IOError: iterations = None res = self.load_result_file(case) return res, iterations def load_iterations(self, case): fp = os.path.join(case['[run_dir]'], case['[iter_dir]'], case['[case_id]']) return np.loadtxt(fp + '.iter') # TODO: HAWC2 result file reading should be moved to Simulations # and we should also switch to faster HAWC2 reading! def load_result_file(self, case, _slice=False): """ Set the correct HAWC2 channels Parameters ---------- case : dict a case dictionary holding all the tags set for this specific HAWC2 simulation Returns ------- res : object A HawcPy LoadResults instance with attributes such as sig, ch_dict, and much much more """ respath = os.path.join(case['[run_dir]'], case['[res_dir]']) resfile = case['[case_id]'] self.res = windIO.LoadResults(respath, resfile.lower()) if not _slice: _slice = np.r_[0:len(self.res.sig)] self.time = self.res.sig[_slice,0] self.sig = self.res.sig[_slice,:] self.case = case return self.res def load_struct_results(self, case, max_modes=500, nrmodes=1000): """ Load the structural analysis result files """ fpath = os.path.join(case['[run_dir]'], case['[eigenfreq_dir]']) # BEAM OUTPUT fname = '%s_beam_output.txt' % case['[case_id]'] beam = None # BODY OUTPUT fname = '%s_body_output.txt' % case['[case_id]'] body = None # EIGEN BODY fname = '%s_eigen_body.txt' % case['[case_id]'] try: eigen_body, rs2 = windIO.ReadEigenBody(fpath, fname, debug=False, nrmodes=nrmodes) except Exception as e: eigen_body = None print('failed to load eigen_body') print(e) # EIGEN STRUCT fname = '%s_eigen_struct.txt' % case['[case_id]'] try: eigen_struct = windIO.ReadEigenStructure(fpath, fname, debug=False, max_modes=max_modes) except Exception as e: eigen_struct = None print('failed to load eigen_struct') print(e) # STRUCT INERTIA fname = '%s_struct_inertia.txt' % case['[case_id]'] struct_inertia = None return beam, body, eigen_body, eigen_struct, struct_inertia def load_errorlogs(self): """Load error log analysis """ fpath = os.path.join(self.post_dir, self.sim_id + '_ErrorLogs.h5') try: df_err = pd.read_hdf(fpath, 'table') except FileNotFoundError: df_err = pd.read_csv(fpath.replace('.h5', '.csv')) return df_err def change_results_dir(self, forcedir, post_dir=False): """ if the post processing concerns simulations done by thyra/gorm, and is downloaded locally, change path to results accordingly """ for case in self.cases: self.cases[case]['[run_dir]'] = forcedir if post_dir: self.cases[case]['[post_dir]'] = post_dir #return cases def _get_cases_dict(self, post_dir, sim_id): """ Load the pickled dictionary containing all the cases and their respective tags. Returns ------- cases : Cases object cases with failures removed. Failed cases are kept in self.cases_fail """ self.cases = load_pickled_file(os.path.join(post_dir, sim_id + '.pkl')) self.cases_fail = {} if self.rem_failed: try: self.load_failed(sim_id) # ditch all the failed cases out of the htc_dict otherwise # we will have fails when reading the results data files self.remove_failed() except IOError: print("couldn't find pickled failed dictionary") return def cases2df(self): """Convert the cases dict to a DataFrame and check data types""" tag_set = [] # maybe some cases have tags that others don't, create a set with # all the tags that occur for cname, tags in self.cases.items(): tag_set.extend(list(tags.keys())) # also add cname as a tag tag_set.append('cname') # only unique tags tag_set = set(tag_set) # and build the df_dict with all the tags df_dict = {tag:[] for tag in tag_set} for cname, tags in self.cases.items(): current_tags = set(tags.keys()) for tag, value in tags.items(): df_dict[tag].append(value) # and the missing ones for tag in (tag_set - current_tags): df_dict[tag].append('') df_dict2 = misc.df_dict_check_datatypes(df_dict) return pd.DataFrame(df_dict2) def merge_sim_ids(self, sim_id_list, silent=False): """ Load and merge for a list of sim_id's cases, fail, dynprop and stats ==================================================================== For all sim_id's in the sim_id_list the cases, stats, fail and dynprop dictionaries are loaded. If one of them doesn't exists, an empty dictionary is returned. Currently, there is no warning given when a certain case will be overwritten upon merging. """ cases_merged = {} cases_fail_merged = {} for ii, sim_id in enumerate(sim_id_list): # TODO: give a warning if we have double entries or not? self.sim_id = sim_id self._get_cases_dict(self.post_dir, sim_id) cases_fail_merged.update(self.cases_fail) # and copy to htc_dict_merged. Note that non unique keys will be # overwritten: each case has to have a unique name! cases_merged.update(self.cases) # merge the statistics if applicable # self.stats_dict[channels] = df if self.loadstats: if ii == 0: self.stats_df, self.Leq_df, self.AEP_df = self.load_stats() else: tmp1, tmp2, tmp3 = self.load_stats() self.stats_df = self.stats_df.append(tmp1) if isinstance(self.Leq_df, pd.DataFrame): self.Leq_df = self.Leq_df.append(tmp2) if isinstance(self.AEP_df, pd.DataFrame): self.AEP_df = self.AEP_df.append(tmp3) self.cases = cases_merged self.cases_fail = cases_fail_merged def printall(self, scenario, figpath=''): """ For all the cases, get the average value of a certain channel """ self.figpath = figpath # plot for each case the dashboard for k in self.cases: if scenario == 'blade_deflection': self.blade_deflection(self.cases[k], self.figpath) def diff(self, refcase_dict, cases): """ See wich tags change over the given cases of the simulation object """ # there is only one case allowed in refcase dict if not len(refcase_dict) == 1: return ValueError, 'Only one case allowed in refcase dict' # take an arbritrary case as baseline for comparison refcase = refcase_dict[list(refcase_dict.keys())[0]] #reftags = sim_dict[refcase] diffdict = dict() adddict = dict() remdict = dict() print() print('*'*80) print('comparing %i cases' % len(cases)) print('*'*80) print() # compare each case with the refcase and see if there are any diffs for case in sorted(cases.keys()): dd = misc.DictDiff(refcase, cases[case]) diffdict[case] = dd.changed() adddict[case] = dd.added() remdict[case] = dd.removed() print('') print('='*80) print(case) print('='*80) for tag in sorted(diffdict[case]): print(tag.rjust(20),':', cases[case][tag]) return diffdict, adddict, remdict def blade_deflection(self, case, **kwargs): """ """ # read the HAWC2 result file self.load_result_file(case) # select all the y deflection channels db = misc.DictDB(self.res.ch_dict) db.search({'sensortype' : 'state pos', 'component' : 'z'}) # sort the keys and save the mean values to an array/list chiz, zvals = [], [] for key in sorted(db.dict_sel.keys()): zvals.append(-self.sig[:,db.dict_sel[key]['chi']].mean()) chiz.append(db.dict_sel[key]['chi']) db.search({'sensortype' : 'state pos', 'component' : 'y'}) # sort the keys and save the mean values to an array/list chiy, yvals = [], [] for key in sorted(db.dict_sel.keys()): yvals.append(self.sig[:,db.dict_sel[key]['chi']].mean()) chiy.append(db.dict_sel[key]['chi']) return np.array(zvals), np.array(yvals) def find_failed(self, df_cases=None, df_err=None, save=True, rem_failed=None): """Given the log file analysis and the Cases tag list, generate a list of failed cases. This is usefull when either some cases have been re-run or when the post-processing is done at the same time as the simulations (e.g. zipchunks approach). Parameters ---------- df_cases : df, default=None If None the current cases dict object is converted to a dataframe using self.cases2df() df_err : df, default=None If None the current error log is converted to a dataframe using self.load_errorlogs() save : boolean, default=True save the failed cases dict using pickle. rem_failed : boolean, default=None if None the already set value self.rem_failed is used. Otherwise self.rem_failed is overwritten. If True, failed cases are removed from self.cases. """ if df_cases is None: df_cases = self.cases2df() if df_err is None: df_err = self.load_errorlogs() if rem_failed is None: rem_failed = self.rem_failed else: self.rem_failed = rem_failed self.cases_fail = {} # convert case_id to log file names # logids = pd.DataFrame(columns=['']) df_cases['logid'] = df_cases['[log_dir]'] + df_cases['[case_id]'] + '.log' # we only need to merge with errorlogs using a portion of the data # join error logs and df_cases on the logid df = pd.merge(df_cases[['logid', '[case_id]']], df_err[['file_name']], left_on='logid', right_on='file_name', how='outer') # missing files: those present in df_cases but not in the error log # this means file_name is a nan or empty # logids_missing2 = set(df_cases['logid']) - set(df_err['file_name']) logids_missing = df[df['file_name'].isnull() | (df['file_name']=='')] for case_id in logids_missing['[case_id]']: cname = case_id + '.htc' self.cases_fail[cname] = copy.copy(self.cases[cname]) if rem_failed: self.remove_failed() if save: save_pickle(os.path.join(self.post_dir, self.sim_id + '_fail.pkl'), self.cases_fail) def remove_failed(self): # don't do anything if there is nothing defined if self.cases_fail == None: print('no failed cases to remove') return # ditch all the failed cases out of the htc_dict # otherwise we will have fails when reading the results data files for k in self.cases_fail: try: self.cases_fail[k] = copy.copy(self.cases[k]) del self.cases[k] print('removed from htc_dict due to error: ' + k) except KeyError: print('WARNING: failed case does not occur in cases') print(' ', k) def load_failed(self, sim_id): fname = os.path.join(self.post_dir, sim_id + '_fail.pkl') FILE = open(fname, 'rb') self.cases_fail = pickle.load(FILE) FILE.close() def load_stats(self, **kwargs): """ Load an existing statistcs file Parameters ---------- post_dir : str, default=self.post_dir sim_id : str, default=self.sim_id fpath : str, default=sim_id leq : bool, default=False columns : list, default=None Returns ------- stats_df : pandas.DataFrame Leq_df : pandas.DataFrame AEP_df : pandas.DataFrame """ post_dir = kwargs.get('post_dir', self.post_dir) sim_id = kwargs.get('sim_id', self.sim_id) fpath = os.path.join(post_dir, sim_id) Leq_df = kwargs.get('leq', False) columns = kwargs.get('columns', None) try: stats_df = pd.read_hdf(fpath + '_statistics.h5', 'table', columns=columns) # FILE = open(post_dir + sim_id + '_statistics.pkl', 'rb') # stats_dict = pickle.load(FILE) # FILE.close() except IOError: stats_df = None print('NO STATS FOUND FOR', sim_id) try: AEP_df = pd.read_hdf(fpath + '_AEP.h5', 'table') except IOError: AEP_df = None print('NO AEP FOUND FOR', sim_id) if Leq_df: try: Leq_df = pd.read_hdf(fpath + '_Leq.h5', 'table') except IOError: Leq_df = None print('NO Leq FOUND FOR', sim_id) return stats_df, Leq_df, AEP_df def statistics(self, new_sim_id=False, silent=False, ch_sel=None, tags=['[seed]','[windspeed]'], calc_mech_power=False, save=True, m=[3, 4, 6, 8, 10, 12], neq=None, no_bins=46, ch_fatigue={}, update=False, add_sensor=None, chs_resultant=[], i0=0, i1=None, saveinterval=1000, csv=True, suffix=None, A=None, add_sigs={}, ch_wind=None, save_new_sigs=False, xlsx=False): """ Calculate statistics and save them in a pandas dataframe. Save also every 500 cases the statistics file. Parameters ---------- ch_sel : list, default=None If defined, only add defined channels to the output data frame. The list should contain valid channel names as defined in ch_dict. tags : list, default=['[seed]','[windspeed]'] Select which tag values from cases should be included in the dataframes. This will help in selecting and identifying the different cases. ch_fatigue : list, default=[] Valid ch_dict channel names for which the equivalent fatigue load needs to be calculated. When set to None, ch_fatigue = ch_sel, and hence all channels will have a fatigue analysis. add_sigs : dict, default={} channel name, expression key/value paires. For example, '[p1-p1-node-002-forcevec-z]*3 + [p1-p1-node-002-forcevec-y]' chs_resultant add_sensor calc_mech_power saveinterval : int, default=1000 When processing a large number of cases, the statistics file will be saved every saveinterval-ed case update : boolean, default=False Update an existing DataFrame instead of overwriting one. When the number of cases is larger then saveinterval, the statistics file will be updated every saveinterval-ed case suffix : boolean or str, default=False When True, the statistics data file will be appended with a suffix that corresponds to the index of the last case added. When a string, that suffix will be added to the file name (up to but excluding, much like range()). Set to True when a large number of cases is being considered in order to avoid excessively large DataFrames. csv : boolean, default=False In addition to a h5 file, save the statistics also in csv format. xlsx : boolean, default=False In addition to a h5 file, save the statistics also in MS Excel xlsx format. Returns ------- dfs : dict Dictionary of dataframes, where the key is the channel name of the output (that was optionally defined in ch_sel), and the value is the dataframe containing the statistical values for all the different selected cases. """ def add_df_row(df_dict, **kwargs): """ add a new channel to the df_dict format of ch_df """ for col, value in kwargs.items(): df_dict[col].append(value) for col in (self.res.cols - set(kwargs.keys())): df_dict[col].append('') return df_dict # in case the output changes, remember the original ch_sel if ch_sel is not None: ch_sel_init = ch_sel.copy() else: ch_sel_init = None if ch_fatigue is None: ch_fatigue_init = None else: ch_fatigue_init = ch_fatigue # TODO: should the default tags not be all the tags in the cases dict? tag_default = ['[case_id]', '[sim_id]'] tag_chan = 'channel' # merge default with other tags for tag in tag_default: if tag not in tags: tags.append(tag) # tags can only be unique, when there the same tag appears twice # it will break the DataFrame creation if len(tags) is not len(set(tags)): raise ValueError('tags can only contain unique entries') # get some basic parameters required to calculate statistics try: case = list(self.cases.keys())[0] except IndexError: print('no cases to select so no statistics, aborting ...') return None post_dir = self.cases[case]['[post_dir]'] if not new_sim_id: # select the sim_id from a random case sim_id = self.cases[case]['[sim_id]'] else: sim_id = new_sim_id if not silent: nrcases = len(self.cases) print('='*79) print('statistics for %s, nr cases: %i' % (sim_id, nrcases)) df_dict = None add_stats = True # for finding [] tags regex = re.compile('(\\[.*?\\])') for ii, (cname, case) in enumerate(self.cases.items()): # build the basic df_dict if not defined if df_dict is None: # the dictionary that will be used to create a pandas dataframe df_dict = { tag:[] for tag in tags } df_dict[tag_chan] = [] # add more columns that will help with IDing the channel df_dict['channel_name'] = [] df_dict['channel_units'] = [] df_dict['channel_nr'] = [] df_dict['channel_desc'] = [] add_stats = True if not silent: pc = '%6.2f' % (float(ii)*100.0/float(nrcases)) pc += ' %' print('stats progress: %4i/%i %s | %s' % (ii, nrcases, pc, cname)) # make sure the selected tags exist if len(tags) != len(set(case) and tags): raise KeyError(' not all selected tags exist in cases') self.load_result_file(case) ch_dict_new = {} # this is really messy, now we are also in parallal using the # channel DataFrame structure ch_df_new = {col:[] for col in self.res.cols} ch_df_new['ch_name'] = [] # calculate the statistics values # stats = self.res.calc_stats(self.sig, i0=i0, i1=i1) i_new_chans = self.sig.shape[1] # self.Nch sig_size = self.res.N # len(self.sig[i0:i1,0]) new_sigs = np.ndarray((sig_size, 0)) for name, expr in add_sigs.items(): channel_tags = regex.findall(expr) # replace all sensor names with expressions template = "self.sig[:,self.res.ch_dict['{}']['chi']]" for chan in channel_tags: # first remove the [] from the tag # FIXME: fails when the same channel occurs more than once expr = expr.replace(chan, chan[1:-1]) expr = expr.replace(chan[1:-1], template.format(chan[1:-1])) sig_add = np.ndarray((len(self.sig[:,0]), 1)) sig_add[:,0] = eval(expr) ch_dict_new[name] = {} ch_dict_new[name]['chi'] = i_new_chans ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans, 'ch_name':name}) i_new_chans += 1 new_sigs = np.append(new_sigs, sig_add, axis=1) if add_sensor is not None: chi1 = self.res.ch_dict[add_sensor['ch1_name']]['chi'] chi2 = self.res.ch_dict[add_sensor['ch2_name']]['chi'] name = add_sensor['ch_name_add'] factor = add_sensor['factor'] operator = add_sensor['operator'] p1 = self.sig[:,chi1] p2 = self.sig[:,chi2] sig_add = np.ndarray((len(p1), 1)) if operator == '*': sig_add[:,0] = p1*p2*factor elif operator == '/': sig_add[:,0] = factor*p1/p2 else: raise ValueError('Operator needs to be either * or /') # add_stats = self.res.calc_stats(sig_add) # add_stats_i = stats['max'].shape[0] # add a new channel description for the mechanical power ch_dict_new[name] = {} ch_dict_new[name]['chi'] = i_new_chans ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans, 'ch_name':name}) i_new_chans += 1 new_sigs = np.append(new_sigs, sig_add, axis=1) # # and append to all the statistics types # for key, stats_arr in stats.iteritems(): # stats[key] = np.append(stats_arr, add_stats[key]) # calculate the resultants sig_resultants = np.ndarray((sig_size, len(chs_resultant))) inc = [] for j, chs in enumerate(chs_resultant): sig_res = np.ndarray((sig_size, len(chs))) lab = '' no_channel = False for i, ch in enumerate(chs): # if the channel does not exist, zet to zero try: chi = self.res.ch_dict[ch]['chi'] sig_res[:,i] = self.sig[:,chi] no_channel = False except KeyError: no_channel = True lab += ch.split('-')[-1] name = '-'.join(ch.split('-')[:-1] + [lab]) # when on of the components do no exist, we can not calculate # the resultant! if no_channel: rpl = (name, cname) print(' missing channel, no resultant for: %s, %s' % rpl) continue inc.append(j) sig_resultants[:,j] = np.sqrt(sig_res*sig_res).sum(axis=1) # resultant = np.sqrt(sig_resultants[:,j].reshape(self.res.N, 1)) # add_stats = self.res.calc_stats(resultant) # add_stats_i = stats['max'].shape[0] # add a new channel description for this resultant ch_dict_new[name] = {} ch_dict_new[name]['chi'] = i_new_chans ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans, 'ch_name':name}) i_new_chans += 1 # and append to all the statistics types # for key, stats_arr in stats.iteritems(): # stats[key] = np.append(stats_arr, add_stats[key]) if len(chs_resultant) > 0: # but only take the channels that where not missing new_sigs = np.append(new_sigs, sig_resultants[:,inc], axis=1) # calculate mechanical power first before deriving statistics # from it if calc_mech_power: name = 'stats-shaft-power' sig_pmech = np.ndarray((sig_size, 1)) sig_pmech[:,0] = self.shaft_power() # P_mech_stats = self.res.calc_stats(sig_pmech) # mech_stats_i = stats['max'].shape[0] # add a new channel description for the mechanical power ch_dict_new[name] = {} ch_dict_new[name]['chi'] = i_new_chans ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans, 'ch_name':name}) i_new_chans += 1 new_sigs = np.append(new_sigs, sig_pmech, axis=1) # and C_p_mech if A is not None: name = 'stats-cp-mech' if ch_wind is None: chiwind = self.res.ch_dict[self.find_windchan_hub()]['chi'] else: chiwind = self.res.ch_dict[ch_wind]['chi'] wind = self.res.sig[:,chiwind] cp = np.ndarray((sig_size, 1)) cp[:,0] = self.cp(-sig_pmech[:,0], wind, A) # add a new channel description for the mechanical power ch_dict_new[name] = {} ch_dict_new[name]['chi'] = i_new_chans ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans, 'ch_name':name}) i_new_chans += 1 new_sigs = np.append(new_sigs, cp, axis=1) try: try: nn_shaft = self.config['nn_shaft'] except: nn_shaft = 4 chan_t = 'shaft_nonrotate-shaft-node-%3.3i-forcevec-z'%nn_shaft i = self.res.ch_dict[chan_t]['chi'] thrust = self.res.sig[:,i] name = 'stats-ct' ct = np.ndarray((sig_size, 1)) ct[:,0] = self.ct(thrust, wind, A) ch_dict_new[name] = {} ch_dict_new[name]['chi'] = i_new_chans ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans, 'ch_name':name}) i_new_chans += 1 new_sigs = np.append(new_sigs, ct, axis=1) except KeyError: print(' can not calculate CT') # and append to all the statistics types # for key, stats_arr in stats.iteritems(): # stats[key] = np.append(stats_arr, P_mech_stats[key]) if save_new_sigs and new_sigs.shape[1] > 0: chis, keys = [], [] for key, value in ch_dict_new.items(): chis.append(value['chi']) keys.append(key) # sort on channel number, so it agrees with the new_sigs array isort = np.array(chis).argsort() keys = np.array(keys)[isort].tolist() df_new_sigs = pd.DataFrame(new_sigs, columns=keys) respath = os.path.join(case['[run_dir]'], case['[res_dir]']) resfile = case['[case_id]'] fname = os.path.join(respath, resfile + '_postres.csv') print(' saving post-processed res: %s...' % fname, end='') df_new_sigs.to_csv(fname, sep='\t') print('done!') del df_new_sigs ch_dict = self.res.ch_dict.copy() ch_dict.update(ch_dict_new) # ch_df = pd.concat([self.res.ch_df, pd.DataFrame(ch_df_new)]) # put all the extra channels into the results if we want to also # be able to calculate the fatigue loads on them. self.sig = np.append(self.sig, new_sigs, axis=1) # calculate the statistics values stats = self.res.calc_stats(self.sig, i0=i0, i1=i1) # Because each channel is a new row, it doesn't matter how many # data channels each case has, and this approach does not brake # when different cases have a different number of output channels # By default, just take all channels in the result file. if ch_sel_init is None: ch_sel = list(ch_dict.keys()) # ch_sel = ch_df.unique_ch_name.tolist() # ch_sel = [str(k) for k in ch_sel] print(' selecting all channels for statistics') # calculate the fatigue properties from selected channels fatigue, tags_fatigue = {}, [] if ch_fatigue_init is None: ch_fatigue = ch_sel print(' selecting all channels for fatigue') else: ch_fatigue = ch_fatigue_init for ch_id in ch_fatigue: chi = ch_dict[ch_id]['chi'] signal = self.sig[:,chi] if neq is None: neq_ = float(case['[duration]']) else: neq_ = neq eq = self.res.calc_fatigue(signal, no_bins=no_bins, neq=neq_, m=m) # save in the fatigue results fatigue[ch_id] = {} fatigue[ch_id]['neq'] = neq_ # when calc_fatigue succeeds, we should have as many items # as in m if len(eq) == len(m): for eq_, m_ in zip(eq, m): fatigue[ch_id]['m=%2.01f' % m_] = eq_ # when it fails, we get an empty list back else: for m_ in m: fatigue[ch_id]['m=%2.01f' % m_] = np.nan # build the fatigue tags for m_ in m: tag = 'm=%2.01f' % m_ tags_fatigue.append(tag) tags_fatigue.append('neq') # ----------------------------------------------------------------- # define the pandas data frame dict on first run # ----------------------------------------------------------------- # Only build the ch_sel collection once. By definition, the # statistics, fatigue and htc tags will not change if add_stats: # statistical parameters for statparam in list(stats.keys()): df_dict[statparam] = [] # # additional tags # for tag in tags: # df_dict[tag] = [] # fatigue data for tag in tags_fatigue: df_dict[tag] = [] add_stats = False for ch_id in ch_sel: chi = ch_dict[ch_id]['chi'] # ch_name is not unique anymore, this doesn't work obviously! # use the channel index instead, that is unique # chi = ch_df[ch_df.unique_ch_name==ch_id].chi.values[0] # sig_stat = [(0=value,1=index),statistic parameter, channel] # stat params = 0 max, 1 min, 2 mean, 3 std, 4 range, 5 abs max # note that min, mean, std, and range are not relevant for index # values. Set to zero there. # ------------------------------------------------------------- # Fill in all the values for the current data entry # ------------------------------------------------------------- # the auxiliry columns try: name = self.res.ch_details[chi,0] unit = self.res.ch_details[chi,1] desc = self.res.ch_details[chi,2] # the new channels from new_sigs are not in here except (IndexError, AttributeError) as e: name = ch_id desc = '' unit = '' df_dict['channel_name'].append(name) df_dict['channel_units'].append(unit) df_dict['channel_desc'].append(desc) df_dict['channel_nr'].append(chi) # each df line is a channel of case that needs to be id-eed df_dict[tag_chan].append(ch_id) # for all the statistics keys, save the values for the # current channel for statparam in list(stats.keys()): df_dict[statparam].append(stats[statparam][chi]) # and save the tags from the input htc file in order to # label each different case properly for tag in tags: df_dict[tag].append(case[tag]) # append any fatigue channels if applicable, otherwise nan if ch_id in fatigue: for m_fatigue, eq_ in fatigue[ch_id].items(): df_dict[m_fatigue].append(eq_) else: for tag in tags_fatigue: # TODO: or should this be NaN? df_dict[tag].append(np.nan) # when dealing with a lot of cases, save the stats data at # intermediate points to avoid memory issues if math.fmod(ii+1, saveinterval) == 0.0: df_dict2 = self._df_dict_check_datatypes(df_dict) # convert, save/update if isinstance(suffix, str): ext = suffix elif suffix is True: ext = '_%06i' % (ii+1) else: ext = '' # dfs = self._df_dict_save(df_dict2, post_dir, sim_id, save=save, # update=update, csv=csv, suffix=ext) # TODO: test this first fname = os.path.join(post_dir, sim_id + '_statistics' + ext) dfs = misc.dict2df(df_dict2, fname, save=save, update=update, csv=csv, xlsx=xlsx, check_datatypes=False, complib=self.complib) df_dict2 = None df_dict = None add_stats = True # only save again when there is actual data in df_dict if df_dict is not None: # make consistent data types df_dict2 = self._df_dict_check_datatypes(df_dict) # convert, save/update if isinstance(suffix, str): ext = suffix elif suffix is True: ext = '_%06i' % ii else: ext = '' # dfs = self._df_dict_save(df_dict2, post_dir, sim_id, save=save, # update=update, csv=csv, suffix=ext) # TODO: test this first fname = os.path.join(post_dir, sim_id + '_statistics' + ext) dfs = misc.dict2df(df_dict2, fname, save=save, update=update, csv=csv, xlsx=xlsx, check_datatypes=False, complib=self.complib) return dfs def _add2newsigs(self, ch_dict, name, i_new_chans, new_sigs, addendum): ch_dict[name] = {} ch_dict[name]['chi'] = i_new_chans i_new_chans += 1 return ch_dict, np.append(new_sigs, addendum, axis=1) # TODO: use the version in misc instead. def _df_dict_save(self, df_dict2, post_dir, sim_id, save=True, update=False, csv=True, suffix=None): """ Convert the df_dict to df and save/update. DEPRICATED, use misc.dict2df instead """ if isinstance(suffix, str): fpath = os.path.join(post_dir, sim_id + '_statistics' + suffix) else: fpath = os.path.join(post_dir, sim_id + '_statistics') # in case converting to dataframe fails, fall back try: dfs = pd.DataFrame(df_dict2) except Exception as e: FILE = open(fpath + '.pkl', 'wb') pickle.dump(df_dict2, FILE, protocol=2) FILE.close() # check what went wrong misc.check_df_dict(df_dict2) print('failed to convert to data frame, saved as dict') raise(e) # # apply categoricals to objects # for column_name, column_dtype in dfs.dtypes.iteritems(): # # applying categoricals mostly makes sense for objects # # we ignore all others # if column_dtype.name == 'object': # dfs[column_name] = dfs[column_name].astype('category') # and save/update the statistics database if save: if update: print('updating statistics: %s ...' % (post_dir + sim_id), end='') try: dfs.to_hdf('%s.h5' % fpath, 'table', mode='r+', append=True, format='table', complevel=9, complib=self.complib) except IOError: print('Can not update, file does not exist. Saving instead' '...', end='') dfs.to_hdf('%s.h5' % fpath, 'table', mode='w', format='table', complevel=9, complib=self.complib) else: print('saving statistics: %s ...' % (post_dir + sim_id), end='') if csv: dfs.to_csv('%s.csv' % fpath) dfs.to_hdf('%s.h5' % fpath, 'table', mode='w', format='table', complevel=9, complib=self.complib) print('DONE!!\n') return dfs # TODO: use the version in misc instead. def _df_dict_check_datatypes(self, df_dict): """ there might be a mix of strings and numbers now, see if we can have the same data type throughout a column nasty hack: because of the unicode -> string conversion we might not overwrite the same key in the dict. DEPRICATED, use misc.df_dict_check_datatypes instead """ # FIXME: this approach will result in twice the memory useage though... # we can not pop/delete items from a dict while iterating over it df_dict2 = {} for colkey, col in df_dict.items(): # if we have a list, convert to string if type(col[0]).__name__ == 'list': for ii, item in enumerate(col): col[ii] = '**'.join(item) # if we already have an array (statistics) or a list of numbers # do not try to cast into another data type, because downcasting # in that case will not raise any exception elif type(col[0]).__name__[:3] in ['flo', 'int', 'nda']: df_dict2[str(colkey)] = np.array(col) continue # in case we have unicodes instead of strings, we need to convert # to strings otherwise the saved .h5 file will have pickled elements try: df_dict2[str(colkey)] = np.array(col, dtype=np.int32) except OverflowError: try: df_dict2[str(colkey)] = np.array(col, dtype=np.int64) except OverflowError: df_dict2[str(colkey)] = np.array(col, dtype=np.float64) except ValueError: try: df_dict2[str(colkey)] = np.array(col, dtype=np.float64) except ValueError: df_dict2[str(colkey)] = np.array(col, dtype=np.str) except TypeError: # in all other cases, make sure we have converted them to # strings and NOT unicode df_dict2[str(colkey)] = np.array(col, dtype=np.str) except Exception as e: print('failed to convert column %s to single data type' % colkey) raise(e) return df_dict2 def fatigue_lifetime(self, dfs, neq_life, res_dir='res/', fh_lst=None, dlc_folder="dlc%s_iec61400-1ed3/", extra_cols=[], save=False, update=False, csv=False, new_sim_id=False, xlsx=False, years=20.0, silent=False): """ Cacluate the fatigue over a selection of cases and indicate how many hours each case contributes to its life time. This approach can only work reliably if the common DLC folder structure is followed. This also means that a 'dlc_config.xlsx' Excel file is required in the HAWC2 root directory (as defined in the [run_dir] tag). Parameters ---------- dfs : DataFrame Statistics Pandas DataFrame. When extra_cols is not defined, it should only hold the results of one standard organized DLC (one turbine, one inflow case). neq_life : float Reference number of cycles. Usually, neq is either set to 10e6, 10e7 or 10e8. res_dir : str, default='res/' Base directory of the results. Results would be located in res/dlc_folder/*.sel. Only relevant when fh_lst is None. dlc_folder : str, default="dlc%s_iec61400-1ed3/" String with the DLC subfolder names. One string substitution is required (%s), and should represent the DLC number (withouth comma or point). Not relevant when fh_lst is defined. extra_cols : list, default=[] The included columns are the material constants, and each row is a channel. When multiple DLC cases are included in dfs, the user has to define additional columns in order to distinguish between the DLC cases. fh_lst : list, default=None Number of hours for each case over its life time. Format: [(filename, hours),...] where, filename is the name of the file (can be a full path, but only the base path is considered), hours is the number of hours over the life time. When fh_lst is set, years, res_dir, dlc_folder and dlc_name are not used. years : float, default=20 Total life time expressed in years, only relevant when fh_lst is None. Returns ------- df_Leq : DataFrame Pandas DataFrame with the life time equivalent load for the given neq, all the channels, and a range of material parameters m. """ if not silent: print('Calculating life time fatigue load') if not isinstance(neq_life, float): neq_type = type(neq_life).__name__ msg = 'neq_life (reference nr of cycles for life time fatigue ' msg += 'load) should be a float instead of %s' % neq_type raise ValueError(msg) # get some basic parameters required to calculate statistics try: case = list(self.cases.keys())[0] except IndexError: if not silent: print('no cases to select so no statistics, aborting ...') return None post_dir = self.cases[case]['[post_dir]'] if not new_sim_id: # select the sim_id from a random case sim_id = self.cases[case]['[sim_id]'] else: sim_id = new_sim_id # FIXME: for backward compatibility, the column name of the unique # channel name has been changed in the past.... if 'unique_ch_name' in dfs.columns: chan_col_name = 'unique_ch_name' else: chan_col_name = 'channel' if fh_lst is None: # FIXME: wb has overlap with dlc_config.xlsx, and shape_k doesn't # seemed to be used by DLCHighLevel wb = WeibullParameters() if 'Weibull' in self.config: for key in self.config['Weibull']: setattr(wb, key, self.config['Weibull'][key]) # we assume the run_dir (root) is the same every where run_dir = self.cases[case]['[run_dir]'] fname = os.path.join(run_dir, 'dlc_config.xlsx') dlc_cfg = dlc.DLCHighLevel(fname, shape_k=wb.shape_k, fail_on_resfile_not_found=True) # if you need all DLCs, make sure to have %s in the file name dlc_cfg.res_folder = os.path.join(run_dir, res_dir, dlc_folder) # no need to build list of result files, we already have it form # the statistics analysis # TODO: could be faster if working with df directly, but how to # assure you're res_dir is always ending with path separator? # only take the values from 1 channel, not all of them!! # FIXME: breaks when not all channels are present for all cases ! # solution: set channel "Time" as a minimum required channel! val = dfs[chan_col_name].values[0] sel = dfs[dfs[chan_col_name]==val] p1, p2 = sel['[res_dir]'].values, sel['[case_id]'].values files = [os.path.join(q1, q2) + '.sel' for q1, q2 in zip(p1, p2)] fh_lst = dlc_cfg.file_hour_lst(years=years, files=files) # now we have a full path to the result files, but we only need the # the case_id to indentify the corresponding entry from the statistics # DataFrame (exluciding the .sel extension) case_ids = [os.path.basename(k[0].replace('.sel', '')) for k in fh_lst] hours = [k[1] for k in fh_lst] # safe how many hours each case is active for AEP calculations for # debugging and inspection reasons. # FIXME: this should be somewhere in its own method or something, # and duplication with what is in AEP should be removed fname = os.path.join(post_dir, sim_id + '_Leq_hourlist') dict_Leq_h = {'case_id':case_ids, 'hours':hours} df_Leq_h = misc.dict2df(dict_Leq_h, fname, update=update, csv=csv, save=save, check_datatypes=True, xlsx=xlsx, complib=self.complib) # --------------------------------------------------------------------- # column definitions # --------------------------------------------------------------------- # available material constants ms, cols = [], [] for key in dfs: if key[:2] == 'm=': ms.append(key) # when multiple DLC cases are included, add extra cols to identify each # DLC group. Make a copy, because extra_cols does not get re-initiated # when defined as an optional keyword argument extra_cols_ = copy.copy(extra_cols + [chan_col_name]) cols = copy.copy(ms) cols.extend(extra_cols_) # --------------------------------------------------------------------- # Built the DataFrame, we do not have a unqique channel index dict_Leq = {col:[] for col in cols} # index on case_id on the original DataFrame so we can select accordingly dfs = dfs.set_index('[case_id]') # which rows to keep: a # select for each channel all the cases for grname, gr in dfs.groupby(dfs[chan_col_name]): # if one m has any nan's, assume none of them are good and throw # away # if np.isnan(gr[ms[0]].values).any(): # sel_rows.pop(grname) # continue # select the cases in the same order as the corresponding hours try: sel_sort = gr.loc[case_ids] except KeyError: if not silent: print(' ignore sensor for Leq:', grname) for col in extra_cols_: # at this stage we already should have one case, so its # identifiers should also be. val_unique = sel_sort[col].unique() if len(val_unique) > 1: print('found %i sets instead of 1:' % len(val_unique)) print(val_unique) raise ValueError('For Leq load, the given DataFrame can ' 'only hold one complete DLC set.') # values of the identifier columns for each case. We do this # in case the original dfs holds multiple DLC cases. dict_Leq[col].append(sel_sort[col].unique()[0]) # R_eq is assumed to be expressed as the 1Hz equivalent load # where neq is set to the simulation lenght # neq_1hz = sel_sort['neq'].values for m in ms: # sel_sort[m] holds the equivalent loads for each of the DLC # cases: such all the different wind speeds for dlc1.2 m_ = float(m.split('=')[1]) # do not multi-ply out neq_1hz from R_eq R_eq_mod = np.power(sel_sort[m].values, m_) # R_eq_mod will have to be scaled from its simulation length # to 1 hour (hour distribution is in hours...). Since the # simulation time has not been multiplied out of R_eq_mod yet, # we can just multiply with 3600 (instead of doing 3600/neq) tmp = (R_eq_mod * np.array(hours) * 3600).sum() # the effective Leq for each of the material constants dict_Leq[m].append(math.pow(tmp/neq_life, 1.0/m_)) # the following is twice as slow: # [i*j for (i,j) in zip(sel_sort[m].values.tolist(),hours)] # collens = misc.check_df_dict(dict_Leq) # make consistent data types, and convert to DataFrame fname = os.path.join(post_dir, sim_id + '_Leq') df_Leq = misc.dict2df(dict_Leq, fname, save=save, update=update, csv=csv, check_datatypes=True, xlsx=xlsx, complib=self.complib) # only keep the ones that do not have nan's (only works with index) return df_Leq def AEP(self, dfs, fh_lst=None, ch_powe='DLL-2-inpvec-2', extra_cols=[], res_dir='res/', dlc_folder="dlc%s_iec61400-1ed3/", csv=False, new_sim_id=False, save=False, years=20.0, update=False, xlsx=False): """ Calculate the Annual Energy Production (AEP) for DLC1.2 cases. Parameters ---------- dfs : DataFrame Statistics Pandas DataFrame. When extra_cols is not defined, it should only hold the results of one standard organized DLC (one turbine, one inflow case). fh_lst : list, default=None Number of hours for each case over its life time. Format: [(filename, hours),...] where, filename is the name of the file (can be a full path, but only the base path is considered), hours is the number of hours over the life time. When fh_lst is set, dlc_folder and dlc_name are not used. ch_powe : string, default='DLL-2-inpvec-2' extra_cols : list, default=[] The included column is just the AEP, and each row is a channel. When multiple DLC cases are included in dfs, the user has to define additional columns in order to distinguish between the DLC cases. res_dir : str, default='res/' Base directory of the results. Results would be located in res/dlc_folder/*.sel dlc_folder : str, default="dlc%s_iec61400-1ed3/" String with the DLC subfolder names. One string substitution is required (%s), and should represent the DLC number (withouth comma or point). Not relevant when fh_lst is defined. """ # get some basic parameters required to calculate statistics try: case = list(self.cases.keys())[0] except IndexError: print('no cases to select so no statistics, aborting ...') return None post_dir = self.cases[case]['[post_dir]'] if not new_sim_id: # select the sim_id from a random case sim_id = self.cases[case]['[sim_id]'] else: sim_id = new_sim_id # FIXME: for backward compatibility, the column name of the unique # channel name has been changed in the past.... if 'unique_ch_name' in dfs.columns: chan_col_name = 'unique_ch_name' else: chan_col_name = 'channel' if fh_lst is None: wb = WeibullParameters() if 'Weibull' in self.config: for key in self.config['Weibull']: setattr(wb, key, self.config['Weibull'][key]) # we assume the run_dir (root) is the same every where run_dir = self.cases[list(self.cases.keys())[0]]['[run_dir]'] fname = os.path.join(run_dir, 'dlc_config.xlsx') dlc_cfg = dlc.DLCHighLevel(fname, shape_k=wb.shape_k) # if you need all DLCs, make sure to have %s in the file name dlc_cfg.res_folder = os.path.join(run_dir, res_dir, dlc_folder) # TODO: could be faster if working with df directly, but how to # assure you're res_dir is always ending with path separator? # FIXME: breaks when not all channels are present for all cases ! # solution: set channel "Time" as a minimum required channel! val = dfs[chan_col_name].values[0] sel = dfs[dfs[chan_col_name]==val] p1, p2 = sel['[res_dir]'].values, sel['[case_id]'].values files = [os.path.join(q1, q2) + '.sel' for q1, q2 in zip(p1, p2)] fh_lst = dlc_cfg.file_hour_lst(years=1.0, files=files) # now we have a full path to the result files, but we only need the # the case_id to indentify the corresponding entry from the statistics # DataFrame (exluciding the .sel extension) def basename(k): return os.path.basename(k[0].replace('.sel', '')) fh_lst_basename = [(basename(k), k[1]) for k in fh_lst] # only take dlc12 for power production case_ids = [k[0] for k in fh_lst_basename if k[0][:5]=='dlc12'] hours = [k[1] for k in fh_lst_basename if k[0][:5]=='dlc12'] # safe how many hours each case is active for AEP calculations for # debugging and inspection reasons. # FIXME: this should be somewhere in its own method or something, # and duplication with what is in fatigue_lifetime should be removed fname = os.path.join(post_dir, sim_id + '_AEP_hourlist') dict_AEP_h = {'case_id':case_ids, 'hours':hours} df_AEP_h = misc.dict2df(dict_AEP_h, fname, update=update, csv=csv, save=save, check_datatypes=True, xlsx=xlsx, complib=self.complib) # and select only the power channels dfs_powe = dfs[dfs[chan_col_name]==ch_powe] # by default we have AEP as a column cols = ['AEP'] cols.extend(extra_cols) # Built the DataFrame, we do not have a unqique channel index dict_AEP = {col:[] for col in cols} # index on case_id on the original DataFrame so we can select accordingly dfs_powe = dfs_powe.set_index('[case_id]') # select the cases in the same order as the corresponding hours sel_sort = dfs_powe.loc[case_ids] for col in extra_cols: # at this stage we already should have one case, so its # identifiers should also be. val_unique = sel_sort[col].unique() if len(val_unique) > 1: print('found %i sets instead of 1:' % len(val_unique)) print(val_unique) raise ValueError('For AEP, the given DataFrame can only hold' 'one complete DLC set. Make sure to identify ' 'the proper extra_cols to identify the ' 'different DLC sets.') # values of the identifier columns for each case. We do this # in case the original dfs holds multiple DLC cases. dict_AEP[col].append(sel_sort[col].unique()[0]) # and the AEP: take the average, multiply with the duration # duration = sel_sort['[duration]'].values # power_mean = sel_sort['mean'].values AEP = (sel_sort['mean'].values * np.array(hours)).sum() dict_AEP['AEP'].append(AEP) # make consistent data types, and convert to DataFrame fname = os.path.join(post_dir, sim_id + '_AEP') df_AEP = misc.dict2df(dict_AEP, fname, update=update, csv=csv, save=save, check_datatypes=True, xlsx=xlsx, complib=self.complib) return df_AEP def stats2dataframe(self, ch_sel=None, tags=['[seed]','[windspeed]']): """ Convert the archaic statistics dictionary of a group of cases to a more convienent pandas dataframe format. DEPRICATED, use statistics instead!! Parameters ---------- ch_sel : dict, default=None Map short names to the channel id's defined in ch_dict in order to have more human readable column names in the pandas dataframe. By default, if ch_sel is None, a dataframe for each channel in the ch_dict (so in the HAWC2 output) will be created. When ch_sel is defined, only those channels are considered. ch_sel[short name] = full ch_dict identifier tags : list, default=['[seed]','[windspeed]'] Select which tag values from cases should be included in the dataframes. This will help in selecting and identifying the different cases. Returns ------- dfs : dict Dictionary of dataframes, where the key is the channel name of the output (that was optionally defined in ch_sel), and the value is the dataframe containing the statistical values for all the different selected cases. """ df_dict = {} for cname, case in self.cases.items(): # make sure the selected tags exist if len(tags) != len(set(case) and tags): raise KeyError('not all selected tags exist in cases') sig_stats = self.stats_dict[cname]['sig_stats'] ch_dict = self.stats_dict[cname]['ch_dict'] if ch_sel is None: ch_sel = { (i, i) for i in ch_dict } for ch_short, ch_name in ch_sel.items(): chi = ch_dict[ch_name]['chi'] # sig_stat = [(0=value,1=index),statistic parameter, channel] # stat params = 0 max, 1 min, 2 mean, 3 std, 4 range, 5 abs max # note that min, mean, std, and range are not relevant for index # values. Set to zero there. try: df_dict[ch_short]['case name'].append(cname) df_dict[ch_short]['max'].append( sig_stats[0,0,chi]) df_dict[ch_short]['min'].append( sig_stats[0,1,chi]) df_dict[ch_short]['mean'].append( sig_stats[0,2,chi]) df_dict[ch_short]['std'].append( sig_stats[0,3,chi]) df_dict[ch_short]['range'].append( sig_stats[0,4,chi]) df_dict[ch_short]['absmax'].append(sig_stats[0,5,chi]) for tag in tags: df_dict[ch_short][tag].append(case[tag]) except KeyError: df_dict[ch_short] = {'case name' : [cname]} df_dict[ch_short]['max'] = [sig_stats[0,0,chi]] df_dict[ch_short]['min'] = [sig_stats[0,1,chi]] df_dict[ch_short]['mean'] = [sig_stats[0,2,chi]] df_dict[ch_short]['std'] = [sig_stats[0,3,chi]] df_dict[ch_short]['range'] = [sig_stats[0,4,chi]] df_dict[ch_short]['absmax'] = [sig_stats[0,5,chi]] for tag in tags: df_dict[ch_short][tag] = [ case[tag] ] # and create for each channel a dataframe dfs = {} for ch_short, df_values in df_dict.items(): dfs[ch_short] = pd.DataFrame(df_values) return dfs def load_azimuth(self, azi, load, sectors=360): """ Establish load dependency on rotor azimuth angle """ # sort on azimuth angle isort = np.argsort(azi) azi = azi[isort] load = load[isort] azi_sel = np.linspace(0, 360, num=sectors) load_sel = np.interp(azi_sel, azi, load) def find_windchan_hub(self): """ """ # if we sort we'll get the largest absolute coordinate last for ch in sorted(self.res.ch_dict.keys()): if ch[:29] == 'windspeed-global-Vy-0.00-0.00': chan_found = ch return chan_found def ct(self, thrust, wind, A, rho=1.225): return thrust / (0.5 * rho * A * wind * wind) def cp(self, power, wind, A, rho=1.225): return power / (0.5 * rho * A * wind * wind * wind) def shaft_power(self): """ Return the mechanical shaft power based on the shaft torsional loading """ try: i = self.res.ch_dict['bearing-shaft_rot-angle_speed-rpm']['chi'] rads = self.res.sig[:,i]*np.pi/30.0 except KeyError: try: i = self.res.ch_dict['bearing-shaft_rot-angle_speed-rads']['chi'] rads = self.res.sig[:,i] except KeyError: i = self.res.ch_dict['Omega']['chi'] rads = self.res.sig[:,i] try: nn_shaft = self.config['nn_shaft'] except: nn_shaft = 4 itorque = self.res.ch_dict['shaft-shaft-node-%3.3i-momentvec-z'%nn_shaft]['chi'] torque = self.res.sig[:,itorque] # negative means power is being extracted, which is exactly what a wind # turbine is about, we call that positive return -1.0*torque*rads def calc_torque_const(self, save=False, name='ojf'): """ If we have constant RPM over the simulation, calculate the torque constant. The current loaded HAWC2 case is considered. Consequently, first load a result file with load_result_file Parameters ---------- save : boolean, default=False name : str, default='ojf' File name of the torque constant result. Default to using the ojf case name. If set to hawc2, it will the case_id. In both cases the file name will be extended with '.kgen' Returns ------- [windspeed, rpm, K] : list """ # make sure the results have been loaded previously try: # get the relevant index to the wanted channels # tag: coord-bodyname-pos-sensortype-component tag = 'bearing-shaft_nacelle-angle_speed-rpm' irpm = self.res.ch_dict[tag]['chi'] chi_rads = self.res.ch_dict['Omega']['chi'] tag = 'shaft-shaft-node-001-momentvec-z' chi_q = self.res.ch_dict[tag]['chi'] except AttributeError: msg = 'load results first with Cases.load_result_file()' raise ValueError(msg) # if not self.case['[fix_rpm]']: # print # return windspeed = self.case['[windspeed]'] rpm = self.res.sig[:,irpm].mean() # and get the average rotor torque applied to maintain # constant rotor speed K = -np.mean(self.res.sig[:,chi_q]*1000./self.res.sig[:,chi_rads]) result = np.array([windspeed, rpm, K]) # optionally, save the values and give the case name as file name if save: fpath = self.case['[post_dir]'] + 'torque_constant/' if name == 'hawc2': fname = self.case['[case_id]'] + '.kgen' elif name == 'ojf': fname = self.case['[ojf_case]'] + '.kgen' else: raise ValueError('name should be either ojf or hawc2') # create the torque_constant dir if it doesn't exists try: os.makedirs(fpath) except OSError: pass # print('gen K saving at:', fpath+fname np.savetxt(fpath+fname, result, header='windspeed, rpm, K') return result def compute_envelopes(self, ch_list, int_env=False, Nx=300): """ The function computes load envelopes for given signals and a single load case. Starting from Mx and My moments, the other cross-sectional forces are identified. Parameters ---------- ch_list : list, list of channels for enevelope computation int_env : boolean, default=False If the logic parameter is True, the function will interpolate the envelope on a given number of points Nx : int, default=300 Number of points for the envelope interpolation Returns ------- envelope : dictionary, The dictionary has entries refered to the channels selected. Inside the dictonary under each entry there is a matrix with 6 columns, each for the sectional forces and moments """ envelope= {} for ch_names in ch_list: ichans = [] for ch_name in ch_names: ichans.append(self.res.ch_dict[ch_name]['chi']) cloud = self.res.sig[:, ichans] # Compute a Convex Hull, the vertices number varies according to # the shape of the poligon vertices = compute_envelope(cloud, int_env=int_env, Nx=Nx) envelope[ch_names[0]] = vertices return envelope def envelopes(self, silent=False, ch_list=[], append=''): """ Calculate envelopes and save them in a table. Parameters ---------- Returns ------- """ # get some basic parameters required to calculate statistics try: case = list(self.cases.keys())[0] except IndexError: print('no cases to select so no statistics, aborting ...') return None post_dir = self.cases[case]['[post_dir]'] sim_id = self.cases[case]['[sim_id]'] if not silent: nrcases = len(self.cases) print('='*79) print('statistics for %s, nr cases: %i' % (sim_id, nrcases)) fname = os.path.join(post_dir, sim_id + '_envelope' + append + '.h5') h5f = tbl.open_file(fname, mode="w", title=str(sim_id), filters=tbl.Filters(complevel=9)) # Create a new group under "/" (root) for ii, (cname, case) in enumerate(self.cases.items()): groupname = str(cname[:-4]) groupname = groupname.replace('-', '_') ctab = h5f.create_group("/", groupname) if not silent: pc = '%6.2f' % (float(ii)*100.0/float(nrcases)) pc += ' %' print('envelope progress: %4i/%i %s' % (ii, nrcases, pc)) self.load_result_file(case) envelope = self.compute_envelopes(ch_list, int_env=False, Nx=300) for ch_id in ch_list: title = str(ch_id[0].replace('-', '_')) csv_table = h5f.create_table(ctab, title, EnvelopeClass.section, title=title) tablerow = csv_table.row for row in envelope[ch_id[0]]: tablerow['Mx'] = float(row[0]) tablerow['My'] = float(row[1]) if len(row)>2: tablerow['Mz'] = float(row[2]) if len(row)>3: tablerow['Fx'] = float(row[3]) tablerow['Fy'] = float(row[4]) tablerow['Fz'] = float(row[5]) else: tablerow['Fx'] = 0.0 tablerow['Fy'] = 0.0 tablerow['Fz'] = 0.0 else: tablerow['Mz'] = 0.0 tablerow['Fx'] = 0.0 tablerow['Fy'] = 0.0 tablerow['Fz'] = 0.0 tablerow.append() csv_table.flush() h5f.close() def force_lower_case_id(self): """Keep for backwards compatibility with the dlctemplate.py """ msg = "force_lower_case_id is depricated and is integrated in " msg += "Cases.createcase() instead." warnings.warn(msg, DeprecationWarning) tmp_cases = {} for cname, case in self.cases.items(): tmp_cases[cname.lower()] = case.copy() self.cases = tmp_cases class EnvelopeClass(object): """ Class with the definition of the table for the envelope results """ class section(tbl.IsDescription): Mx = tbl.Float32Col() My = tbl.Float32Col() Mz = tbl.Float32Col() Fx = tbl.Float32Col() Fy = tbl.Float32Col() Fz = tbl.Float32Col() # TODO: implement this class Results(object): """ Move all Hawc2io to here? NO: this should be the wrapper, to interface the htc_dict with the io functions There should be a bare metal module/class for those who only want basic python support for HAWC2 result files and/or launching simulations. How to properly design this module? Change each class into a module? Or leave like this? """ # OK, for now use this to do operations on HAWC2 results files def __init___(self): """ """ pass def m_equiv(self, st_arr, load, pos): r"""Centrifugal corrected equivalent moment Convert beam loading into a single equivalent bending moment. Note that this is dependent on the location in the cross section. Due to the way we measure the strain on the blade and how we did the calibration of those sensors. .. math:: \epsilon = \frac{M_{x_{equiv}}y}{EI_{xx}} = \frac{M_x y}{EI_{xx}} + \frac{M_y x}{EI_{yy}} + \frac{F_z}{EA} M_{x_{equiv}} = M_x + \frac{I_{xx}}{I_{yy}} M_y \frac{x}{y} + \frac{I_{xx}}{Ay} F_z Parameters ---------- st_arr : np.ndarray(19) Only one line of the st_arr is allowed and it should correspond to the correct radial position of the strain gauge. load : list(6) list containing the load time series of following components .. math:: load = F_x, F_y, F_z, M_x, M_y, M_z and where each component is an ndarray(m) pos : np.ndarray(2) x,y position wrt neutral axis in the cross section for which the equivalent load should be calculated Returns ------- m_eq : ndarray(m) Equivalent load, see main title """ F_z = load[2] M_x = load[3] M_y = load[4] x, y = pos[0], pos[1] A = st_arr[ModelData.st_headers.A] I_xx = st_arr[ModelData.st_headers.Ixx] I_yy = st_arr[ModelData.st_headers.Iyy] M_x_equiv = M_x + ( (I_xx/I_yy)*M_y*(x/y) ) + ( F_z*I_xx/(A*y) ) # or ignore edgewise moment #M_x_equiv = M_x + ( F_z*I_xx/(A*y) ) return M_x_equiv class MannTurb64(prepost.PBSScript): """ alfaeps, L, gamma, seed, nr_u, nr_v, nr_w, du, dv, dw high_freq_comp mann_turb_x64.exe fname 1.0 29.4 3.0 1209 256 32 32 2.0 5 5 true. Following tags have to be defined: * [tu_model] * [turb_base_name] * [MannAlfaEpsilon] * [MannL] * [MannGamma] * [seed] * [turb_nr_u] * [turb_nr_v] * [turb_nr_w] * [turb_dx] * [turb_dy] * [turb_dz] * [high_freq_comp] """ def __init__(self, silent=False): super(MannTurb64, self).__init__() self.exe = 'time WINEARCH=win64 WINEPREFIX=~/.wine wine mann_turb_x64.exe' self.winefix = 'winefix\n' # PBS configuration self.umask = '0003' self.walltime = '00:59:59' self.queue = 'workq' self.lnodes = '1' self.ppn = '1' self.silent = silent self.pbs_in_dir = 'pbs_in_turb/' def gen_pbs(self, cases): """ Parameters ---------- cases : dict of dicts each key holding a dictionary with tag/value pairs. """ case0 = cases[list(cases.keys())[0]] # make sure the path's end with a trailing separator, why?? self.pbsworkdir = os.path.join(case0['[run_dir]'], '') if not self.silent: print('\nStart creating PBS files for turbulence with Mann64...') for cname, case in cases.items(): # only relevant for cases with turbulence if '[tu_model]' in case and int(case['[tu_model]']) == 0: continue if '[turb_base_name]' not in case: continue base_name = case['[turb_base_name]'] # pbs_in/out dir can contain subdirs, only take the inner directory out_base = misc.path_split_dirs(case['[pbs_out_dir]'])[0] turb = case['[turb_dir]'] self.path_pbs_e = os.path.join(out_base, turb, base_name + '.err') self.path_pbs_o = os.path.join(out_base, turb, base_name + '.out') self.path_pbs_i = os.path.join(self.pbs_in_dir, base_name + '.p') # apply winefix self.prelude = self.winefix # browse to scratch dir self.prelude += 'cd {}\n'.format(self.scratchdir) self.coda = '# COPY BACK FROM SCRATCH AND RENAME, remove _ at end\n' # copy back to turb dir at the end if case['[turb_db_dir]'] is not None: dst = os.path.join('$PBS_O_WORKDIR', case['[turb_db_dir]'], base_name) else: dst = os.path.join('$PBS_O_WORKDIR', case['[turb_dir]'], base_name) # FIXME: Mann64 turb exe creator adds an underscore to output for comp in list('uvw'): src = '{}_{}.bin'.format(base_name, comp) dst2 = '{}{}.bin'.format(dst, comp) self.coda += 'cp {} {}\n'.format(src, dst2) # alfaeps, L, gamma, seed, nr_u, nr_v, nr_w, du, dv, dw high_freq_comp rpl = (float(case['[MannAlfaEpsilon]']), float(case['[MannL]']), float(case['[MannGamma]']), int(case['[seed]']), int(case['[turb_nr_u]']), int(case['[turb_nr_v]']), int(case['[turb_nr_w]']), float(case['[turb_dx]']), float(case['[turb_dy]']), float(case['[turb_dz]']), int(case['[high_freq_comp]'])) params = '%1.6f %1.6f %1.6f %i %i %i %i %1.4f %1.4f %1.4f %i' % rpl self.execution = '%s %s %s' % (self.exe, base_name, params) self.create(check_dirs=True) def eigenbody(cases, debug=False): """ Read HAWC2 body eigenalysis result file ======================================= This is basically a cases convience wrapper around Hawc2io.ReadEigenBody Parameters ---------- cases : dict{ case : dict{tag : value} } Dictionary where each case is a key and its value a dictionary holding all the tags/value pairs as used for that case. Returns ------- cases : dict{ case : dict{tag : value} } Dictionary where each case is a key and its value a dictionary holding all the tags/value pairs as used for that case. For each case, it is updated with the results, results2 of the eigenvalue analysis performed for each body using the following respective tags: [eigen_body_results] and [eigen_body_results2]. """ #Body data for body number : 3 with the name :nacelle #Results: fd [Hz] fn [Hz] log.decr [%] #Mode nr: 1: 1.45388E-21 1.74896E-03 6.28319E+02 for case in cases: # tags for the current case tags = cases[case] file_path = os.path.join(tags['[run_dir]'], tags['[eigenfreq_dir]']) # FIXME: do not assuem anything about the file name here, should be # fully defined in the tags/dataframe file_name = tags['[case_id]'] + '_body_eigen' # and load the eigenfrequency body results results, results2 = windIO.ReadEigenBody(file_path, file_name, nrmodes=10) # add them to the htc_dict cases[case]['[eigen_body_results]'] = results cases[case]['[eigen_body_results2]'] = results2 return cases def eigenstructure(cases, debug=False): """ Read HAWC2 structure eigenalysis result file ============================================ This is basically a cases convience wrapper around windIO.ReadEigenStructure Parameters ---------- cases : dict{ case : dict{tag : value} } Dictionary where each case is a key and its value a dictionary holding all the tags/value pairs as used for that case. Returns ------- cases : dict{ case : dict{tag : value} } Dictionary where each case is a key and its value a dictionary holding all the tags/value pairs as used for that case. For each case, it is updated with the modes_arr of the eigenvalue analysis performed for the structure. The modes array (ndarray(3,n)) holds fd, fn and damping. """ for case in cases: # tags for the current case tags = cases[case] file_path = os.path.join(tags['[run_dir]'], tags['[eigenfreq_dir]']) # FIXME: do not assuem anything about the file name here, should be # fully defined in the tags/dataframe file_name = tags['[case_id]'] + '_strc_eigen' # and load the eigenfrequency structure results modes = windIO.ReadEigenStructure(file_path, file_name, max_modes=500) # add them to the htc_dict cases[case]['[eigen_structure]'] = modes return cases if __name__ == '__main__': pass