From 94ae3471591a0d9baa314b9cd17595b754288a09 Mon Sep 17 00:00:00 2001 From: David Robert Verelst <dave@dtu.dk> Date: Thu, 7 Mar 2019 16:51:19 +0100 Subject: [PATCH 1/5] prepost.dlcdefs: correctly count total number of cases from spreadsheets --- wetb/prepost/dlcdefs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wetb/prepost/dlcdefs.py b/wetb/prepost/dlcdefs.py index 1f4e754d..a287d2a3 100644 --- a/wetb/prepost/dlcdefs.py +++ b/wetb/prepost/dlcdefs.py @@ -391,7 +391,7 @@ def excel_stabcon(proot, fext='xlsx', pignore=None, pinclude=None, sheet=0, if not silent: k = 0 - for df in dict_dfs: + for dlc, df in viewitems(dict_dfs): k += len(df) print('in which a total of %i cases are defined.' % k) -- GitLab From 2163b33a958237f2b8fd7ed3cfb06d46f9d94c53 Mon Sep 17 00:00:00 2001 From: David Robert Verelst <dave@dtu.dk> Date: Tue, 19 Mar 2019 13:07:58 +0100 Subject: [PATCH 2/5] prepost.misc: do not fail on empty DataFrames --- wetb/prepost/misc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wetb/prepost/misc.py b/wetb/prepost/misc.py index b6129907..caa424d0 100644 --- a/wetb/prepost/misc.py +++ b/wetb/prepost/misc.py @@ -1113,8 +1113,10 @@ def df_dict_check_datatypes(df_dict): # we can not pop/delete items from a dict while iterating over it df_dict2 = {} for colkey, col in df_dict.items(): + if len(col)==0: + pass # if we have a list, convert to string - if type(col[0]).__name__ == 'list': + elif type(col[0]).__name__ == 'list': for ii, item in enumerate(col): col[ii] = '**'.join(item) # if we already have an array (statistics) or a list of numbers -- GitLab From 62240676dce7113b472d08ab9d6ea5c39a3b1b7a Mon Sep 17 00:00:00 2001 From: David Robert Verelst <dave@dtu.dk> Date: Tue, 19 Mar 2019 13:14:28 +0100 Subject: [PATCH 3/5] prepost.dlctemplate: do not remove failed cases prior to finding them --- wetb/prepost/dlctemplate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wetb/prepost/dlctemplate.py b/wetb/prepost/dlctemplate.py index 8308b175..6ec0a4a2 100644 --- a/wetb/prepost/dlctemplate.py +++ b/wetb/prepost/dlctemplate.py @@ -626,7 +626,7 @@ def postpro_node_merge(tqdm=False, zipchunks=False, m=[3,4,6,8,9,10,12]): def prepare_failed(compress=False, wine_arch='win32', wine_prefix='~/.wine32', prelude='', zipchunks=False): - cc = sim.Cases(POST_DIR, sim_id) + cc = sim.Cases(POST_DIR, sim_id, rem_failed=False) df_tags = cc.cases2df() # ------------------------------------------------------------------------- -- GitLab From 732fa11eb122ab5cf96611f75e37774960ba896f Mon Sep 17 00:00:00 2001 From: David Robert Verelst <dave@dtu.dk> Date: Tue, 19 Mar 2019 13:33:57 +0100 Subject: [PATCH 4/5] prepost.simchunks: separate ppn used for pbs and actual usage on node --- wetb/prepost/dlctemplate.py | 2 +- wetb/prepost/simchunks.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/wetb/prepost/dlctemplate.py b/wetb/prepost/dlctemplate.py index 6ec0a4a2..3339bce3 100644 --- a/wetb/prepost/dlctemplate.py +++ b/wetb/prepost/dlctemplate.py @@ -374,7 +374,7 @@ def launch_dlcs_excel(sim_id, silent=False, verbose=False, pbs_turb=False, ppn=20, nr_procs_series=3, walltime='20:00:00', chunks_dir='zip-chunks-jess', compress=compress, wine_arch=wine_arch, wine_prefix=wine_prefix, - prelude=prelude) + prelude=prelude, ppn_pbs=20) # create_chunks_htc_pbs(cases, sort_by_values=sorts_on, queue='workq', # ppn=12, nr_procs_series=3, walltime='20:00:00', # chunks_dir='zip-chunks-gorm', compress=compress, diff --git a/wetb/prepost/simchunks.py b/wetb/prepost/simchunks.py index a6498c63..f514aebb 100644 --- a/wetb/prepost/simchunks.py +++ b/wetb/prepost/simchunks.py @@ -39,7 +39,7 @@ def create_chunks_htc_pbs(cases, sort_by_values=['[Windspeed]'], ppn=20, i0=0, walltime='24:00:00', chunks_dir='zip-chunks-jess', wine_arch='win32', wine_prefix='~/.wine32', pyenv_cmd='source /home/python/miniconda3/bin/activate', - pyenv='wetb_py3', prelude=''): + pyenv='wetb_py3', prelude='', ppn_pbs=20): """Group a large number of simulations htc and pbs launch scripts into different zip files so we can run them with find+xargs on various nodes. """ @@ -145,7 +145,7 @@ def create_chunks_htc_pbs(cases, sort_by_values=['[Windspeed]'], ppn=20, i0=0, pbs_tmplate += "#PBS -W umask=[umask]\n" pbs_tmplate += "### Maximum wallclock time format HOURS:MINUTES:SECONDS\n" pbs_tmplate += "#PBS -l walltime=[walltime]\n" - pbs_tmplate += "#PBS -l nodes=[nodes]:ppn=[ppn]\n" + pbs_tmplate += "#PBS -l nodes=[nodes]:ppn=[ppn_pbs]\n" pbs_tmplate += "### Queue name\n" pbs_tmplate += "#PBS -q [queue]\n" pbs_tmplate += "\n" @@ -210,7 +210,7 @@ def create_chunks_htc_pbs(cases, sort_by_values=['[Windspeed]'], ppn=20, i0=0, pbs = pbs.replace('[umask]', '0003') pbs = pbs.replace('[walltime]', walltime) pbs = pbs.replace('[nodes]', str(nodes)) - pbs = pbs.replace('[ppn]', str(ppn)) + pbs = pbs.replace('[ppn_pbs]', str(ppn_pbs)) pbs = pbs.replace('[queue]', queue) pbs += '\necho "%s"\n' % ('-'*70) -- GitLab From 23bed626b876d02e36104ae53df90997322011b8 Mon Sep 17 00:00:00 2001 From: David Robert Verelst <dave@dtu.dk> Date: Tue, 19 Mar 2019 13:53:02 +0100 Subject: [PATCH 5/5] prepost.dlctemplate: do not crash out of merging when keys are misalligned --- wetb/prepost/dlctemplate.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/wetb/prepost/dlctemplate.py b/wetb/prepost/dlctemplate.py index 3339bce3..cd0b651c 100644 --- a/wetb/prepost/dlctemplate.py +++ b/wetb/prepost/dlctemplate.py @@ -598,8 +598,8 @@ def postpro_node_merge(tqdm=False, zipchunks=False, m=[3,4,6,8,9,10,12]): cc = sim.Cases(POST_DIR, sim_id) df_tags = cc.cases2df() df_stats = pd.merge(df, df_tags[required], on=['[case_id]']) - # if the merge didn't work due to other misaligned case_id tags, do not - # overwrite our otherwise ok tables! + # find out if we have some misalignment between generated cases and results + # this could happen when we added new cases and removed others if len(df_stats) != len(df): print('failed to merge required tags, something is wrong!') # find out which cases we lost and why @@ -610,8 +610,12 @@ def postpro_node_merge(tqdm=False, zipchunks=False, m=[3,4,6,8,9,10,12]): msg = 'nr of case_ids lost:' print(msg, (len(df)-len(df_stats))/len(df['channel'].unique())) print('following case_ids have mysteriously disappeared:') - print(s_df-s_stats) - return + missing = s_df-s_stats + print(missing) + # save misalligned cases + fname = os.path.join(POST_DIR, '%s_misallgined_cases.tsv' % sim_id) + pd.DataFrame(missing).to_csv(fname, sep='\t') + df_stats.to_hdf(fdf, 'table', mode='w') df_stats.to_csv(fdf.replace('.h5', '.csv')) -- GitLab