Skip to content
Snippets Groups Projects
Commit a4e47070 authored by David Verelst's avatar David Verelst
Browse files

prepost.windIO: add method to LogFile to convert csv format to df

parent 4ef58a75
No related branches found
No related tags found
No related merge requests found
...@@ -27,6 +27,7 @@ import struct ...@@ -27,6 +27,7 @@ import struct
import math import math
from time import time from time import time
import codecs import codecs
from itertools import chain
import scipy.integrate as integrate import scipy.integrate as integrate
import numpy as np import numpy as np
...@@ -136,6 +137,7 @@ class LogFile(object): ...@@ -136,6 +137,7 @@ class LogFile(object):
self.init_cols = len(self.err_init) self.init_cols = len(self.err_init)
self.sim_cols = len(self.err_sim) self.sim_cols = len(self.err_sim)
self.header = None
def readlog(self, fname, case=None, save_iter=False): def readlog(self, fname, case=None, save_iter=False):
""" """
...@@ -354,6 +356,18 @@ class LogFile(object): ...@@ -354,6 +356,18 @@ class LogFile(object):
contents = contents + '\n' contents = contents + '\n'
return contents return contents
def csv2df(self, fname):
"""Read a csv log file analysis and convert to a pandas.DataFrame
"""
colnames, min_itemsize, dtypes = self.headers4df()
df = pd.read_csv(fname, header=0, names=colnames, sep=';', )
for col, dtype in dtypes.items():
df[col] = df[col].astype(dtype)
# replace nan with empty for str columns
if dtype == str:
df[col] = df[col].str.replace('nan', '')
return df
def _header(self): def _header(self):
"""Header for log analysis csv file """Header for log analysis csv file
""" """
...@@ -371,6 +385,65 @@ class LogFile(object): ...@@ -371,6 +385,65 @@ class LogFile(object):
return contents return contents
def headers4df(self):
"""Create header and a minimum itemsize for string columns when
converting a Log check analysis to a pandas.DataFrame
Returns
-------
header : list
List of column names as generated by WindIO.LogFile._header
min_itemsize : dict
Dictionary with column names as keys, and the minimum string lenght
as values.
dtypes : dict
Dictionary with column names as keys, and data types as values
"""
chain_iter = chain.from_iterable
colnames = ['file_name']
colnames.extend(list(chain_iter(('nr_%i' % i, 'msg_%i' % i)
for i in range(31))) )
gr = ('first_tstep_%i', 'last_step_%i', 'nr_%i', 'msg_%i')
colnames.extend(list(chain_iter( (k % i for k in gr)
for i in range(100,105,1))) )
colnames.extend(['nr_extra', 'msg_extra'])
colnames.extend(['elapsted_time',
'last_time_step',
'simulation_time',
'real_sim_time',
'sim_output_time',
'total_iterations',
'dt',
'nr_time_steps',
'seconds_p_iteration',
'mean_iters_p_time_step',
'max_iters_p_time_step',
'sim_id'])
dtypes = {}
# str and float datatypes for
msg_cols = ['msg_%i' % i for i in range(30)]
msg_cols.extend(['msg_%i' % i for i in range(100,105,1)])
dtypes.update({k:str for k in msg_cols})
# make the message/str columns long enough
min_itemsize = {'msg_%i' % i : 100 for i in range(30)}
# column names holding the number of occurances of messages
nr_cols = ['nr_%i' % i for i in range(30)]
nr_cols.extend(['nr_%i' % i for i in range(100,105,1)])
# other float values
nr_cols.extend(['elapsted_time', 'total_iterations'])
# NaN only exists in float arrays, not integers (NumPy limitation)
# so use float instead of int
dtypes.update({k:np.float64 for k in nr_cols})
return colnames, min_itemsize, dtypes
class LoadResults(ReadHawc2): class LoadResults(ReadHawc2):
"""Read a HAWC2 result data file """Read a HAWC2 result data file
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment