Skip to content
Snippets Groups Projects
Commit a4e47070 authored by David Verelst's avatar David Verelst
Browse files

prepost.windIO: add method to LogFile to convert csv format to df

parent 4ef58a75
No related branches found
No related tags found
No related merge requests found
......@@ -27,6 +27,7 @@ import struct
import math
from time import time
import codecs
from itertools import chain
import scipy.integrate as integrate
import numpy as np
......@@ -136,6 +137,7 @@ class LogFile(object):
self.init_cols = len(self.err_init)
self.sim_cols = len(self.err_sim)
self.header = None
def readlog(self, fname, case=None, save_iter=False):
"""
......@@ -354,6 +356,18 @@ class LogFile(object):
contents = contents + '\n'
return contents
def csv2df(self, fname):
"""Read a csv log file analysis and convert to a pandas.DataFrame
"""
colnames, min_itemsize, dtypes = self.headers4df()
df = pd.read_csv(fname, header=0, names=colnames, sep=';', )
for col, dtype in dtypes.items():
df[col] = df[col].astype(dtype)
# replace nan with empty for str columns
if dtype == str:
df[col] = df[col].str.replace('nan', '')
return df
def _header(self):
"""Header for log analysis csv file
"""
......@@ -371,6 +385,65 @@ class LogFile(object):
return contents
def headers4df(self):
"""Create header and a minimum itemsize for string columns when
converting a Log check analysis to a pandas.DataFrame
Returns
-------
header : list
List of column names as generated by WindIO.LogFile._header
min_itemsize : dict
Dictionary with column names as keys, and the minimum string lenght
as values.
dtypes : dict
Dictionary with column names as keys, and data types as values
"""
chain_iter = chain.from_iterable
colnames = ['file_name']
colnames.extend(list(chain_iter(('nr_%i' % i, 'msg_%i' % i)
for i in range(31))) )
gr = ('first_tstep_%i', 'last_step_%i', 'nr_%i', 'msg_%i')
colnames.extend(list(chain_iter( (k % i for k in gr)
for i in range(100,105,1))) )
colnames.extend(['nr_extra', 'msg_extra'])
colnames.extend(['elapsted_time',
'last_time_step',
'simulation_time',
'real_sim_time',
'sim_output_time',
'total_iterations',
'dt',
'nr_time_steps',
'seconds_p_iteration',
'mean_iters_p_time_step',
'max_iters_p_time_step',
'sim_id'])
dtypes = {}
# str and float datatypes for
msg_cols = ['msg_%i' % i for i in range(30)]
msg_cols.extend(['msg_%i' % i for i in range(100,105,1)])
dtypes.update({k:str for k in msg_cols})
# make the message/str columns long enough
min_itemsize = {'msg_%i' % i : 100 for i in range(30)}
# column names holding the number of occurances of messages
nr_cols = ['nr_%i' % i for i in range(30)]
nr_cols.extend(['nr_%i' % i for i in range(100,105,1)])
# other float values
nr_cols.extend(['elapsted_time', 'total_iterations'])
# NaN only exists in float arrays, not integers (NumPy limitation)
# so use float instead of int
dtypes.update({k:np.float64 for k in nr_cols})
return colnames, min_itemsize, dtypes
class LoadResults(ReadHawc2):
"""Read a HAWC2 result data file
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment