From a4e4707047921f1d124fef72255bba93b6ad3847 Mon Sep 17 00:00:00 2001 From: dave <dave@dtu.dk> Date: Tue, 9 Aug 2016 18:53:25 +0200 Subject: [PATCH] prepost.windIO: add method to LogFile to convert csv format to df --- wetb/prepost/windIO.py | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/wetb/prepost/windIO.py b/wetb/prepost/windIO.py index 394d2fa..3500151 100755 --- a/wetb/prepost/windIO.py +++ b/wetb/prepost/windIO.py @@ -27,6 +27,7 @@ import struct import math from time import time import codecs +from itertools import chain import scipy.integrate as integrate import numpy as np @@ -136,6 +137,7 @@ class LogFile(object): self.init_cols = len(self.err_init) self.sim_cols = len(self.err_sim) + self.header = None def readlog(self, fname, case=None, save_iter=False): """ @@ -354,6 +356,18 @@ class LogFile(object): contents = contents + '\n' return contents + def csv2df(self, fname): + """Read a csv log file analysis and convert to a pandas.DataFrame + """ + colnames, min_itemsize, dtypes = self.headers4df() + df = pd.read_csv(fname, header=0, names=colnames, sep=';', ) + for col, dtype in dtypes.items(): + df[col] = df[col].astype(dtype) + # replace nan with empty for str columns + if dtype == str: + df[col] = df[col].str.replace('nan', '') + return df + def _header(self): """Header for log analysis csv file """ @@ -371,6 +385,65 @@ class LogFile(object): return contents + def headers4df(self): + """Create header and a minimum itemsize for string columns when + converting a Log check analysis to a pandas.DataFrame + + Returns + ------- + + header : list + List of column names as generated by WindIO.LogFile._header + + min_itemsize : dict + Dictionary with column names as keys, and the minimum string lenght + as values. + + dtypes : dict + Dictionary with column names as keys, and data types as values + """ + chain_iter = chain.from_iterable + + colnames = ['file_name'] + colnames.extend(list(chain_iter(('nr_%i' % i, 'msg_%i' % i) + for i in range(31))) ) + + gr = ('first_tstep_%i', 'last_step_%i', 'nr_%i', 'msg_%i') + colnames.extend(list(chain_iter( (k % i for k in gr) + for i in range(100,105,1))) ) + colnames.extend(['nr_extra', 'msg_extra']) + colnames.extend(['elapsted_time', + 'last_time_step', + 'simulation_time', + 'real_sim_time', + 'sim_output_time', + 'total_iterations', + 'dt', + 'nr_time_steps', + 'seconds_p_iteration', + 'mean_iters_p_time_step', + 'max_iters_p_time_step', + 'sim_id']) + dtypes = {} + + # str and float datatypes for + msg_cols = ['msg_%i' % i for i in range(30)] + msg_cols.extend(['msg_%i' % i for i in range(100,105,1)]) + dtypes.update({k:str for k in msg_cols}) + # make the message/str columns long enough + min_itemsize = {'msg_%i' % i : 100 for i in range(30)} + + # column names holding the number of occurances of messages + nr_cols = ['nr_%i' % i for i in range(30)] + nr_cols.extend(['nr_%i' % i for i in range(100,105,1)]) + # other float values + nr_cols.extend(['elapsted_time', 'total_iterations']) + # NaN only exists in float arrays, not integers (NumPy limitation) + # so use float instead of int + dtypes.update({k:np.float64 for k in nr_cols}) + + return colnames, min_itemsize, dtypes + class LoadResults(ReadHawc2): """Read a HAWC2 result data file -- GitLab