From c6468079a8494d4e633fae045c83ab6720c383f7 Mon Sep 17 00:00:00 2001
From: dave <dave@dtu.dk>
Date: Wed, 10 Aug 2016 11:14:09 +0200
Subject: [PATCH] [WIP] prepost.simchunks: first attempt to re-organize tar
 files

---
 wetb/prepost/simchunks.py | 47 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/wetb/prepost/simchunks.py b/wetb/prepost/simchunks.py
index b510cd3..876b07d 100644
--- a/wetb/prepost/simchunks.py
+++ b/wetb/prepost/simchunks.py
@@ -394,6 +394,53 @@ def create_chunks_htc_pbs(cases, sort_by_values=['[Windspeed]'], ppn=20,
     df_ind.to_csv(fname+'.csv')
 
 
+def regroup_tarfiles(cc):
+    """Re-group all chunks again per [Case folder] compressed file. First all
+    chunks are copied to the node scratch disc, then start working on them.
+    This only works on a node with PBS stuff.
+
+    Make sure to maintain the same location as defined by the tags!
+
+    [res_dir] and [Case folder] could be multiple directories deep, bu the
+    final archive will only contain the files (no directory structure), and
+    the name of the archive is that of the last directory:
+        /[res_dir]/[Case folder]/[Case folder].tar.xz
+        /res/dir/case/folder/dlcname/dlcname.tar.xz
+
+    Parameters
+    ----------
+
+    path_pattern : str
+        /path/to/files/*.tar.xz
+
+    """
+
+    USER = os.getenv('USER')
+    PBS_JOBID = os.getenv('PBS_JOBID')
+    scratch = os.path.join('/scratch', USER, PBS_JOBID)
+    src = os.getenv('PBS_O_WORKDIR')
+
+    path_pattern = '/home/dave/SimResults/NREL5MW/D0022/prepost-data/*.xz'
+
+    for ffname in tqdm(glob.glob(path_pattern)):
+        appendix = os.path.basename(ffname).split('_')[0]
+        with tarfile.open(ffname, mode='r:xz') as tar:
+            # create new tar files if necessary for each [Case folder]
+            for tarinfo in tar.getmembers():
+                t2_name = os.path.basename(os.path.dirname(tarinfo.name))
+                t2_dir = os.path.join(os.path.dirname(path_pattern), t2_name)
+                if not os.path.isdir(t2_dir):
+                    os.makedirs(t2_dir)
+                t2_path = os.path.join(t2_dir, t2_name + '_%s.tar' % appendix)
+                fileobj = tar.extractfile(tarinfo)
+                # change the location of the file in the new archive:
+                # the location of the archive is according to the folder
+                # structure as defined in the tags, remove any subfolders
+                tarinfo.name = os.basename(tarinfo.name)
+                with tarfile.open(t2_path, mode='a') as t2:
+                    t2.addfile(tarinfo, fileobj)
+
+
 def merge_from_tarfiles(df_fname, path, pattern, tarmode='r:xz', tqdm=False,
                         header='infer', names=None, sep=',', min_itemsize={},
                         verbose=False, dtypes={}):
-- 
GitLab