From 94ae3471591a0d9baa314b9cd17595b754288a09 Mon Sep 17 00:00:00 2001
From: David Robert Verelst <dave@dtu.dk>
Date: Thu, 7 Mar 2019 16:51:19 +0100
Subject: [PATCH 1/5] prepost.dlcdefs: correctly count total number of cases
 from spreadsheets

---
 wetb/prepost/dlcdefs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wetb/prepost/dlcdefs.py b/wetb/prepost/dlcdefs.py
index 1f4e754d..a287d2a3 100644
--- a/wetb/prepost/dlcdefs.py
+++ b/wetb/prepost/dlcdefs.py
@@ -391,7 +391,7 @@ def excel_stabcon(proot, fext='xlsx', pignore=None, pinclude=None, sheet=0,
 
     if not silent:
         k = 0
-        for df in dict_dfs:
+        for dlc, df in viewitems(dict_dfs):
             k += len(df)
         print('in which a total of %i cases are defined.' % k)
 
-- 
GitLab


From 2163b33a958237f2b8fd7ed3cfb06d46f9d94c53 Mon Sep 17 00:00:00 2001
From: David Robert Verelst <dave@dtu.dk>
Date: Tue, 19 Mar 2019 13:07:58 +0100
Subject: [PATCH 2/5] prepost.misc: do not fail on empty DataFrames

---
 wetb/prepost/misc.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/wetb/prepost/misc.py b/wetb/prepost/misc.py
index b6129907..caa424d0 100644
--- a/wetb/prepost/misc.py
+++ b/wetb/prepost/misc.py
@@ -1113,8 +1113,10 @@ def df_dict_check_datatypes(df_dict):
     # we can not pop/delete items from a dict while iterating over it
     df_dict2 = {}
     for colkey, col in df_dict.items():
+        if len(col)==0:
+            pass
         # if we have a list, convert to string
-        if type(col[0]).__name__ == 'list':
+        elif type(col[0]).__name__ == 'list':
             for ii, item in enumerate(col):
                 col[ii] = '**'.join(item)
         # if we already have an array (statistics) or a list of numbers
-- 
GitLab


From 62240676dce7113b472d08ab9d6ea5c39a3b1b7a Mon Sep 17 00:00:00 2001
From: David Robert Verelst <dave@dtu.dk>
Date: Tue, 19 Mar 2019 13:14:28 +0100
Subject: [PATCH 3/5] prepost.dlctemplate: do not remove failed cases prior to
 finding them

---
 wetb/prepost/dlctemplate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wetb/prepost/dlctemplate.py b/wetb/prepost/dlctemplate.py
index 8308b175..6ec0a4a2 100644
--- a/wetb/prepost/dlctemplate.py
+++ b/wetb/prepost/dlctemplate.py
@@ -626,7 +626,7 @@ def postpro_node_merge(tqdm=False, zipchunks=False, m=[3,4,6,8,9,10,12]):
 def prepare_failed(compress=False, wine_arch='win32', wine_prefix='~/.wine32',
                    prelude='', zipchunks=False):
 
-    cc = sim.Cases(POST_DIR, sim_id)
+    cc = sim.Cases(POST_DIR, sim_id, rem_failed=False)
     df_tags = cc.cases2df()
 
     # -------------------------------------------------------------------------
-- 
GitLab


From 732fa11eb122ab5cf96611f75e37774960ba896f Mon Sep 17 00:00:00 2001
From: David Robert Verelst <dave@dtu.dk>
Date: Tue, 19 Mar 2019 13:33:57 +0100
Subject: [PATCH 4/5] prepost.simchunks: separate ppn used for pbs and actual
 usage on node

---
 wetb/prepost/dlctemplate.py | 2 +-
 wetb/prepost/simchunks.py   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/wetb/prepost/dlctemplate.py b/wetb/prepost/dlctemplate.py
index 6ec0a4a2..3339bce3 100644
--- a/wetb/prepost/dlctemplate.py
+++ b/wetb/prepost/dlctemplate.py
@@ -374,7 +374,7 @@ def launch_dlcs_excel(sim_id, silent=False, verbose=False, pbs_turb=False,
                               ppn=20, nr_procs_series=3, walltime='20:00:00',
                               chunks_dir='zip-chunks-jess', compress=compress,
                               wine_arch=wine_arch, wine_prefix=wine_prefix,
-                              prelude=prelude)
+                              prelude=prelude, ppn_pbs=20)
 #        create_chunks_htc_pbs(cases, sort_by_values=sorts_on, queue='workq',
 #                              ppn=12, nr_procs_series=3, walltime='20:00:00',
 #                              chunks_dir='zip-chunks-gorm', compress=compress,
diff --git a/wetb/prepost/simchunks.py b/wetb/prepost/simchunks.py
index a6498c63..f514aebb 100644
--- a/wetb/prepost/simchunks.py
+++ b/wetb/prepost/simchunks.py
@@ -39,7 +39,7 @@ def create_chunks_htc_pbs(cases, sort_by_values=['[Windspeed]'], ppn=20, i0=0,
                           walltime='24:00:00', chunks_dir='zip-chunks-jess',
                           wine_arch='win32', wine_prefix='~/.wine32',
                           pyenv_cmd='source /home/python/miniconda3/bin/activate',
-                          pyenv='wetb_py3', prelude=''):
+                          pyenv='wetb_py3', prelude='', ppn_pbs=20):
     """Group a large number of simulations htc and pbs launch scripts into
     different zip files so we can run them with find+xargs on various nodes.
     """
@@ -145,7 +145,7 @@ def create_chunks_htc_pbs(cases, sort_by_values=['[Windspeed]'], ppn=20, i0=0,
     pbs_tmplate += "#PBS -W umask=[umask]\n"
     pbs_tmplate += "### Maximum wallclock time format HOURS:MINUTES:SECONDS\n"
     pbs_tmplate += "#PBS -l walltime=[walltime]\n"
-    pbs_tmplate += "#PBS -l nodes=[nodes]:ppn=[ppn]\n"
+    pbs_tmplate += "#PBS -l nodes=[nodes]:ppn=[ppn_pbs]\n"
     pbs_tmplate += "### Queue name\n"
     pbs_tmplate += "#PBS -q [queue]\n"
     pbs_tmplate += "\n"
@@ -210,7 +210,7 @@ def create_chunks_htc_pbs(cases, sort_by_values=['[Windspeed]'], ppn=20, i0=0,
         pbs = pbs.replace('[umask]', '0003')
         pbs = pbs.replace('[walltime]', walltime)
         pbs = pbs.replace('[nodes]', str(nodes))
-        pbs = pbs.replace('[ppn]', str(ppn))
+        pbs = pbs.replace('[ppn_pbs]', str(ppn_pbs))
         pbs = pbs.replace('[queue]', queue)
         pbs += '\necho "%s"\n' % ('-'*70)
 
-- 
GitLab


From 23bed626b876d02e36104ae53df90997322011b8 Mon Sep 17 00:00:00 2001
From: David Robert Verelst <dave@dtu.dk>
Date: Tue, 19 Mar 2019 13:53:02 +0100
Subject: [PATCH 5/5] prepost.dlctemplate: do not crash out of merging when
 keys are misalligned

---
 wetb/prepost/dlctemplate.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/wetb/prepost/dlctemplate.py b/wetb/prepost/dlctemplate.py
index 3339bce3..cd0b651c 100644
--- a/wetb/prepost/dlctemplate.py
+++ b/wetb/prepost/dlctemplate.py
@@ -598,8 +598,8 @@ def postpro_node_merge(tqdm=False, zipchunks=False, m=[3,4,6,8,9,10,12]):
     cc = sim.Cases(POST_DIR, sim_id)
     df_tags = cc.cases2df()
     df_stats = pd.merge(df, df_tags[required], on=['[case_id]'])
-    # if the merge didn't work due to other misaligned case_id tags, do not
-    # overwrite our otherwise ok tables!
+    # find out if we have some misalignment between generated cases and results
+    # this could happen when we added new cases and removed others
     if len(df_stats) != len(df):
         print('failed to merge required tags, something is wrong!')
         # find out which cases we lost and why
@@ -610,8 +610,12 @@ def postpro_node_merge(tqdm=False, zipchunks=False, m=[3,4,6,8,9,10,12]):
         msg = 'nr of case_ids lost:'
         print(msg, (len(df)-len(df_stats))/len(df['channel'].unique()))
         print('following case_ids have mysteriously disappeared:')
-        print(s_df-s_stats)
-        return
+        missing = s_df-s_stats
+        print(missing)
+        # save misalligned cases
+        fname = os.path.join(POST_DIR, '%s_misallgined_cases.tsv' % sim_id)
+        pd.DataFrame(missing).to_csv(fname, sep='\t')
+
     df_stats.to_hdf(fdf, 'table', mode='w')
     df_stats.to_csv(fdf.replace('.h5', '.csv'))
 
-- 
GitLab