From a4e4707047921f1d124fef72255bba93b6ad3847 Mon Sep 17 00:00:00 2001
From: dave <dave@dtu.dk>
Date: Tue, 9 Aug 2016 18:53:25 +0200
Subject: [PATCH] prepost.windIO: add method to LogFile to convert csv format
 to df

---
 wetb/prepost/windIO.py | 73 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/wetb/prepost/windIO.py b/wetb/prepost/windIO.py
index 394d2fa..3500151 100755
--- a/wetb/prepost/windIO.py
+++ b/wetb/prepost/windIO.py
@@ -27,6 +27,7 @@ import struct
 import math
 from time import time
 import codecs
+from itertools import chain
 
 import scipy.integrate as integrate
 import numpy as np
@@ -136,6 +137,7 @@ class LogFile(object):
 
         self.init_cols = len(self.err_init)
         self.sim_cols = len(self.err_sim)
+        self.header = None
 
     def readlog(self, fname, case=None, save_iter=False):
         """
@@ -354,6 +356,18 @@ class LogFile(object):
             contents = contents + '\n'
         return contents
 
+    def csv2df(self, fname):
+        """Read a csv log file analysis and convert to a pandas.DataFrame
+        """
+        colnames, min_itemsize, dtypes = self.headers4df()
+        df = pd.read_csv(fname, header=0, names=colnames, sep=';', )
+        for col, dtype in dtypes.items():
+            df[col] = df[col].astype(dtype)
+            # replace nan with empty for str columns
+            if dtype == str:
+                df[col] = df[col].str.replace('nan', '')
+        return df
+
     def _header(self):
         """Header for log analysis csv file
         """
@@ -371,6 +385,65 @@ class LogFile(object):
 
         return contents
 
+    def headers4df(self):
+        """Create header and a minimum itemsize for string columns when
+        converting a Log check analysis to a pandas.DataFrame
+
+        Returns
+        -------
+
+        header : list
+            List of column names as generated by WindIO.LogFile._header
+
+        min_itemsize : dict
+            Dictionary with column names as keys, and the minimum string lenght
+            as values.
+
+        dtypes : dict
+            Dictionary with column names as keys, and data types as values
+        """
+        chain_iter = chain.from_iterable
+
+        colnames = ['file_name']
+        colnames.extend(list(chain_iter(('nr_%i' % i, 'msg_%i' % i)
+                      for i in range(31))) )
+
+        gr = ('first_tstep_%i', 'last_step_%i', 'nr_%i', 'msg_%i')
+        colnames.extend(list(chain_iter( (k % i for k in gr)
+                           for i in range(100,105,1))) )
+        colnames.extend(['nr_extra', 'msg_extra'])
+        colnames.extend(['elapsted_time',
+                       'last_time_step',
+                       'simulation_time',
+                       'real_sim_time',
+                       'sim_output_time',
+                       'total_iterations',
+                       'dt',
+                       'nr_time_steps',
+                       'seconds_p_iteration',
+                       'mean_iters_p_time_step',
+                       'max_iters_p_time_step',
+                       'sim_id'])
+        dtypes = {}
+
+        # str and float datatypes for
+        msg_cols = ['msg_%i' % i for i in range(30)]
+        msg_cols.extend(['msg_%i' % i for i in range(100,105,1)])
+        dtypes.update({k:str for k in msg_cols})
+        # make the message/str columns long enough
+        min_itemsize = {'msg_%i' % i : 100 for i in range(30)}
+
+        # column names holding the number of occurances of messages
+        nr_cols = ['nr_%i' % i for i in range(30)]
+        nr_cols.extend(['nr_%i' % i for i in range(100,105,1)])
+        # other float values
+        nr_cols.extend(['elapsted_time', 'total_iterations'])
+        # NaN only exists in float arrays, not integers (NumPy limitation)
+        # so use float instead of int
+        dtypes.update({k:np.float64 for k in nr_cols})
+
+        return colnames, min_itemsize, dtypes
+
 
 class LoadResults(ReadHawc2):
     """Read a HAWC2 result data file
-- 
GitLab