From 9c3139db3527af556ebef39e2d30bc86892ab07e Mon Sep 17 00:00:00 2001
From: David Robert Verelst <dave@dtu.dk>
Date: Sun, 18 Feb 2018 14:53:09 +0100
Subject: [PATCH] prepost.dlctemplate: larger chunks for zipchunk merge

---
 wetb/prepost/dlctemplate.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/wetb/prepost/dlctemplate.py b/wetb/prepost/dlctemplate.py
index 4d7d939..2d67935 100644
--- a/wetb/prepost/dlctemplate.py
+++ b/wetb/prepost/dlctemplate.py
@@ -518,7 +518,9 @@ def postpro_node_merge(tqdm=False, zipchunks=False):
     dtypes = {col:np.float64 for col in colnames}
     dtypes['channel'] = str
     dtypes['[case_id]'] = str
-    mdf.csv2df_chunks(store, fcsv, chunksize=300000, min_itemsize={}, sep=',',
+    # when using min_itemsize the column names should be valid variable names
+    # mitemsize = {'channel':60, '[case_id]':60}
+    mdf.csv2df_chunks(store, fcsv, chunksize=1000000, min_itemsize={}, sep=',',
                       colnames=colnames, dtypes=dtypes, header=0)
     store.close()
     # -------------------------------------------------------------------------
@@ -547,7 +549,8 @@ def postpro_node_merge(tqdm=False, zipchunks=False):
         s_df = set(df['[case_id]'].unique())
         s_stats = set(df_stats['[case_id]'].unique())
         print('nr of channels:', len(df['channel'].unique()))
-        print((len(df)-len(df_stats))/len(df['channel'].unique()))
+        msg = 'nr of case_ids lost:'
+        print(msg, (len(df)-len(df_stats))/len(df['channel'].unique()))
         print('following case_ids have mysteriously disappeared:')
         print(s_df-s_stats)
         return
-- 
GitLab