diff --git a/wetb/prepost/dlcdefs.py b/wetb/prepost/dlcdefs.py index 6938cb5dc4c7d6638ddc4eba6d80b6bc511d1a71..fa821ffeec438135ab823c39d40111f4dbe072a0 100644 --- a/wetb/prepost/dlcdefs.py +++ b/wetb/prepost/dlcdefs.py @@ -483,9 +483,10 @@ def excel_stabcon(proot, fext='xlsx', pignore=None, pinclude=None, sheet=0, tags_dict['[pi_gen_reg1.K]'] = hs2.pi_gen_reg1.K tags_dict['[pi_gen_reg2.Kp]'] = hs2.pi_gen_reg2.Kp tags_dict['[pi_gen_reg2.Ki]'] = hs2.pi_gen_reg2.Ki - tags_dict['[pi_gen_reg2.Kd]'] = 0.0 + tags_dict['[pi_gen_reg2.Kd]'] = hs2.pi_gen_reg2.Kd tags_dict['[pi_pitch_reg3.Kp]'] = hs2.pi_pitch_reg3.Kp tags_dict['[pi_pitch_reg3.Ki]'] = hs2.pi_pitch_reg3.Ki + tags_dict['[pi_pitch_reg3.Kd]'] = hs2.pi_pitch_reg3.Kd tags_dict['[pi_pitch_reg3.K1]'] = hs2.pi_pitch_reg3.K1 tags_dict['[pi_pitch_reg3.K2]'] = hs2.pi_pitch_reg3.K2 tags_dict['[aero_damp.Kp2]'] = hs2.aero_damp.Kp2 diff --git a/wetb/prepost/dlctemplate.py b/wetb/prepost/dlctemplate.py index 1a12702e72efc1140452df42bf7c3e0de06e50f3..ee86a5dd9f3d0241ced94e9df46bdde8370bf0ee 100644 --- a/wetb/prepost/dlctemplate.py +++ b/wetb/prepost/dlctemplate.py @@ -19,15 +19,14 @@ import socket from argparse import ArgumentParser from sys import platform -#import numpy as np -#import pandas as pd +import numpy as np +import pandas as pd from matplotlib import pyplot as plt #import matplotlib as mpl from wetb.prepost import Simulations as sim -from wetb.prepost import dlcdefs -from wetb.prepost import dlcplots -from wetb.prepost.simchunks import create_chunks_htc_pbs +from wetb.prepost import (dlcdefs, dlcplots, windIO) +from wetb.prepost.simchunks import (create_chunks_htc_pbs, AppendDataFrames) from wetb.prepost.GenerateDLCs import GenerateDLCCases plt.rc('font', family='serif') @@ -459,6 +458,68 @@ def post_launch(sim_id, statistics=True, rem_failed=True, check_logs=True, return df_stats, df_AEP, df_Leq +def postpro_node_merge(tqdm=False): + """With postpro_node each individual case has a .csv file for the log file + analysis and a .csv file for the statistics tables. Merge all these single + files into one table/DataFrame. + + Parameters + ---------- + + tqdm : boolean, default=False + Set to True for displaying a progress bar (provided by the tqdm module) + when merging all csv files into a single table/pd.DataFrame. + + """ + # ------------------------------------------------------------------------- + # MERGE POSTPRO ON NODE APPROACH INTO ONE DataFrame + # ------------------------------------------------------------------------- + lf = windIO.LogFile() + path_pattern = os.path.join(P_RUN, 'logfiles', '*', '*.csv') + csv_fname = '%s_ErrorLogs.csv' % sim_id + fcsv = os.path.join(POST_DIR, csv_fname) + mdf = AppendDataFrames(tqdm=tqdm) + # individual log file analysis does not have header, make sure to include + # a line for the header + mdf.txt2txt(fcsv, path_pattern, tarmode='r:xz', header=None, + header_fjoined=lf._header(), recursive=True) + # convert from CSV to DataFrame + df = lf.csv2df(fcsv) + df.to_hdf(fcsv.replace('.csv', '.h5'), 'table') + # ------------------------------------------------------------------------- + path_pattern = os.path.join(P_RUN, 'res', '*', '*.csv') + csv_fname = '%s_statistics.csv' % sim_id + fcsv = os.path.join(POST_DIR, csv_fname) + mdf = AppendDataFrames(tqdm=tqdm) + # individual log file analysis does not have header, make sure to include + # a line for the header + mdf.txt2txt(fcsv, path_pattern, tarmode='r:xz', header=0, sep=',', + header_fjoined=None, recursive=True, fname_col='[case_id]') + # and convert to df: takes 2 minutes + fdf = fcsv.replace('.csv', '.h5') + store = pd.HDFStore(fdf, mode='w', format='table', complevel=9, + complib='zlib') + colnames = ['channel', 'max', 'min', 'mean', 'std', 'range', + 'absmax', 'rms', 'int', 'm=3', 'm=4', 'm=6', 'm=8', 'm=10', + 'm=12', 'intabs', '[case_id]'] + dtypes = {col:np.float64 for col in colnames} + dtypes['channel'] = str + dtypes['[case_id]'] = str + mdf.csv2df_chunks(store, fcsv, chunksize=300000, min_itemsize={}, sep=',', + colnames=colnames, dtypes=dtypes, header=0) + store.close() + # ------------------------------------------------------------------------- + # merge missing cols onto stats + required = ['[DLC]', '[run_dir]', '[wdir]', '[Windspeed]', '[res_dir]', + '[case_id]'] + df = pd.read_hdf(fdf, 'table') + cc = sim.Cases(POST_DIR, sim_id) + df_tags = cc.cases2df()[required] + df_stats = pd.merge(df, df_tags, on=['[case_id]']) + df_stats.to_hdf(fdf, 'table') + df_stats.to_csv(fdf.replace('.h5', '.csv')) + + if __name__ == '__main__': parser = ArgumentParser(description = "pre- or post-processes DLC's") @@ -516,6 +577,12 @@ if __name__ == '__main__': dest='postpro_node', help='Perform the log analysis ' 'and stats calculation on the node right after the ' 'simulation has finished.') + parser.add_argument('--postpro_node_merge', default=False, + action='store_true', dest='postpro_node_merge', + help='Merge all individual statistics and log file ' + 'analysis .csv files into one table/pd.DataFrame. ' + 'Requires that htc files have been created with ' + '--prep --postpro_node.') parser.add_argument('--gendlcs', default=False, action='store_true', help='Generate DLC exchange files based on master DLC ' 'spreadsheet.') @@ -578,6 +645,8 @@ if __name__ == '__main__': save_new_sigs=opt.save_new_sigs, save_iter=False, envelopeturbine=opt.envelopeturbine, envelopeblade=opt.envelopeblade) + if opt.postpro_node_merge: + postpro_node_merge() if opt.dlcplot: plot_chans = {} plot_chans['$B1_{flap}$'] = ['setbeta-bladenr-1-flapnr-1'] diff --git a/wetb/prepost/hawcstab2.py b/wetb/prepost/hawcstab2.py index ec1c7020e1ec19c6284df9b0e192472b769e6df5..9eaaf88a6108a6c13210557c345b7bbff1f793af 100644 --- a/wetb/prepost/hawcstab2.py +++ b/wetb/prepost/hawcstab2.py @@ -373,6 +373,10 @@ class ReadControlTuning(object): # set some parameters to zero for the linear case, or when aerodynamic # gain scheduling is not used + if not hasattr(self.pi_gen_reg2, 'Kd'): + setattr(self.pi_gen_reg2, 'Kd', 0.0) + if not hasattr(self.pi_pitch_reg3, 'Kd'): + setattr(self.pi_pitch_reg3, 'Kd', 0.0) if not hasattr(self.pi_pitch_reg3, 'K2'): setattr(self.pi_pitch_reg3, 'K2', 0.0) if not hasattr(self.aero_damp, 'Kp2'): @@ -394,10 +398,11 @@ class ReadControlTuning(object): tune_tags['[pi_gen_reg2.I]'] = self.pi_gen_reg2.I tune_tags['[pi_gen_reg2.Kp]'] = self.pi_gen_reg2.Kp tune_tags['[pi_gen_reg2.Ki]'] = self.pi_gen_reg2.Ki - tune_tags['[pi_gen_reg2.Kd]'] = 0.0 + tune_tags['[pi_gen_reg2.Kd]'] = self.pi_gen_reg2.Kd tune_tags['[pi_pitch_reg3.Kp]'] = self.pi_pitch_reg3.Kp tune_tags['[pi_pitch_reg3.Ki]'] = self.pi_pitch_reg3.Ki + tune_tags['[pi_pitch_reg3.Kd]'] = self.pi_pitch_reg3.Kd tune_tags['[pi_pitch_reg3.K1]'] = self.pi_pitch_reg3.K1 tune_tags['[pi_pitch_reg3.K2]'] = self.pi_pitch_reg3.K2 diff --git a/wetb/prepost/mplutils.py b/wetb/prepost/mplutils.py index 08dcf1f990065ac67c932b4d1fc1de792c1ef374..96f7ae63608ae9a99370739855ca3e31bda3fb37 100644 --- a/wetb/prepost/mplutils.py +++ b/wetb/prepost/mplutils.py @@ -278,6 +278,38 @@ def match_yticks(ax1, ax2, nr_ticks_forced=None, extend=False): return ax1, ax2 +def psd(ax, time, sig, nfft=None, res_param=250, f0=0, f1=None, nr_peaks=10, + min_h=15, mark_peaks=False, col='r-', label=None, alpha=1.0, + ypos_peaks=0.9, ypos_peaks_delta=0.12): + """Only plot the psd on a given axis and optionally mark the peaks. + """ + + sps = int(round(1.0/np.diff(time).mean(), 0)) + if f1 is None: + f1 = sps/2.0 + + if nfft is None: + nfft = int(round(res_param * sps / (f1-f0), 0)) + if nfft > len(sig): + nfft = len(sig) + + # calculate the PSD + Pxx, freqs = mpl.mlab.psd(sig, NFFT=nfft, Fs=sps) + + i0 = np.abs(freqs - f0).argmin() + i1 = np.abs(freqs - f1).argmin() + + # plotting psd, marking peaks + ax.plot(freqs[i0:i1], Pxx[i0:i1], col, label=label, alpha=alpha) + if mark_peaks: + ax = peaks(ax, freqs[i0:i1], Pxx[i0:i1], fn_max=f1, + nr_peaks=nr_peaks, col_line=col[:1], + ypos_delta=ypos_peaks_delta, bbox_alpha=0.5, + ypos_mean=ypos_peaks, min_h=min_h, col_text='w') + + return ax + + def time_psd(results, labels, axes, alphas=[1.0, 0.7], colors=['k-', 'r-'], NFFT=None, res_param=250, f0=0, f1=None, nr_peaks=10, min_h=15, mark_peaks=False, xlabels=['frequency [Hz]', 'time [s]'], @@ -317,32 +349,15 @@ def time_psd(results, labels, axes, alphas=[1.0, 0.7], colors=['k-', 'r-'], label = labels[i] col = colors[i] alpha = alphas[i] - sps = int(round(1.0/np.diff(time).mean(), 0)) - if f1 is None: - f1 = sps/2.0 - - if NFFT is None: - nfft = int(round(res_param * sps / (f1-f0), 0)) - elif isinstance(NFFT, list): + if isinstance(NFFT, list): nfft = NFFT[i] else: nfft = NFFT - if nfft > len(data): - nfft = len(data) - - # calculate the PSD - Pxx, freqs = mpl.mlab.psd(data, NFFT=nfft, Fs=sps) - - i0 = np.abs(freqs - f0).argmin() - i1 = np.abs(freqs - f1).argmin() - - # plotting psd, marking peaks - axes[0].plot(freqs[i0:i1], Pxx[i0:i1], col, label=label, alpha=alpha) - if mark_peaks: - axes[0] = peaks(axes[0], freqs[i0:i1], Pxx[i0:i1], fn_max=f1, - nr_peaks=nr_peaks, col_line=col[:1], - ypos_delta=ypos_peaks_delta, bbox_alpha=0.5, - ypos_mean=ypos_peaks[i], min_h=min_h, col_text='w') + axes[0] = psd(axes[0], time, data, nfft=nfft, res_param=res_param, + f0=f0, f1=f1, nr_peaks=nr_peaks, min_h=min_h, + mark_peaks=mark_peaks, col=col, label=label, alpha=alpha, + ypos_peaks=ypos_peaks, ypos_peaks_delta=ypos_peaks_delta) + # plotting time series axes[1].plot(time, data, col, label=label, alpha=alpha) diff --git a/wetb/prepost/simchunks.py b/wetb/prepost/simchunks.py index 0fc2f8eff7501e3ccc26cb14de71518a6c2df8f1..3207c50e2b623f62f1ad4f8c4ce4d9dbb9bda462 100644 --- a/wetb/prepost/simchunks.py +++ b/wetb/prepost/simchunks.py @@ -24,6 +24,8 @@ import zipfile import copy import tarfile import glob +import shutil +import tempfile import numpy as np import pandas as pd @@ -548,7 +550,6 @@ def merge_from_tarfiles(df_fname, path, pattern, tarmode='r:xz', tqdm=False, return df, storecols store.close() - return None, None # TODO: make this class more general so you can also just give a list of files @@ -566,10 +567,27 @@ class AppendDataFrames(object): return itereable self.tqdm = tqdm + def _open(self, fname, tarmode='r:xz'): + """Open text file directly or from a tar archive. Return iterable + since a tar archive might contain several csv text files + """ + + if fname.find('.tar') > -1: + with tarfile.open(fname, mode=tarmode) as tar: + for tarinfo in tar.getmembers(): + linesb = tar.extractfile(tarinfo).readlines() + # convert from bytes to strings + lines = [line.decode() for line in linesb] + yield lines, tarinfo.name + else: + with open(fname, 'r') as f: + lines = f.readlines() + yield lines, os.path.basename(fname) + def df2store(self, store, path, tarmode='r:xz', min_itemsize={}, colnames=None, header='infer', columns=None, sep=';', index2col=None, ignore_index=True, fname_col=False): - """ + """This is very slow, use txt2txt instead. """ # TODO: it seems that with threading you could parallelize this kind @@ -614,40 +632,69 @@ class AppendDataFrames(object): # FIXME: when merging log file analysis (files with header), we are still # skipping over one case def txt2txt(self, fjoined, path, tarmode='r:xz', header=None, sep=';', - fname_col=False): + fname_col=False, header_fjoined=None, recursive=False): """Read as strings, write to another file as strings. + + Parameters + ---------- + + fjoined + + path + + tarmode + + header : int, default=None + Indicate if data files contain a header and on which line it is + located. Set to None if data files do not contain header, and in + that case the joined file will not contain a header either. All + lines above the header are ignored. + + sep + + fname_col + + header_fjoined : str, default=None + If the data files do not contain a header write out header_fjoined + as the header of the joined file. + + recursive + """ - if header is not None: + if isinstance(header, int): write_header = True icut = header + 1 else: # when header is None, there is no header icut = 0 write_header = False - with open(fjoined, 'w') as f: - for fname in self.tqdm(glob.glob(path)): - with tarfile.open(fname, mode=tarmode) as tar: - for tarinfo in tar.getmembers(): - linesb = tar.extractfile(tarinfo).readlines() - # convert from bytes to strings - lines = [line.decode() for line in linesb] - # only include the header at the first round - if write_header: - line = lines[header] - # add extra column with the file name if applicable - if fname_col: - rpl = sep + fname_col + '\n' - line = line.replace('\n', rpl) - f.write(line) - write_header = False - # but cut out the header on all other occurances - for line in lines[icut:]: - if fname_col: - case_id = os.path.basename(tarinfo.name) - case_id = '.'.join(case_id.split('.')[:-1]) - line = line.replace('\n', sep + case_id + '\n') - f.write(line) - f.flush() + if isinstance(header_fjoined, str): + write_header = True + + with tempfile.NamedTemporaryFile(mode='w', delete=False) as ft: + ftname = ft.name + for fname in self.tqdm(glob.glob(path, recursive=recursive)): + for lines, case_id in self._open(fname, tarmode=tarmode): + # only include the header at the first round + if write_header: + if header_fjoined is None: + header_fjoined = lines[header] + # add extra column with the file name if applicable + if fname_col: + rpl = sep + fname_col + '\n' + header_fjoined = header_fjoined.replace('\n', rpl) + ft.write(header_fjoined) + write_header = False + # but cut out the header on all other occurances + case_id = '.'.join(case_id.split('.')[:-1]) + for line in lines[icut:]: + if fname_col: + line = line.replace('\n', sep + case_id + '\n') + ft.write(line) + ft.flush() + + # and move from temp dir to fjoined + shutil.move(ftname, fjoined) def csv2df_chunks(self, store, fcsv, chunksize=100000, min_itemsize={}, colnames=None, dtypes={}, header='infer', sep=';'): diff --git a/wetb/prepost/windIO.py b/wetb/prepost/windIO.py index c0ae4ba4b0a385ece0c452febc02838e70b91b79..2e4c71b7e4b3713eccfefd567f03f8900008bb9b 100755 --- a/wetb/prepost/windIO.py +++ b/wetb/prepost/windIO.py @@ -380,11 +380,11 @@ class LogFile(object): contents = contents + '\n' return contents - def csv2df(self, fname): + def csv2df(self, fname, header=0): """Read a csv log file analysis and convert to a pandas.DataFrame """ colnames, min_itemsize, dtypes = self.headers4df() - df = pd.read_csv(fname, header=0, names=colnames, sep=';', ) + df = pd.read_csv(fname, header=header, names=colnames, sep=';', ) for col, dtype in dtypes.items(): df[col] = df[col].astype(dtype) # replace nan with empty for str columns