Simulations.py

                #sett = True
                # first character is the #, the rest is the number
                set_nr = int(line_list[0][1:])
                st_dict['%03i-000-0' % set_nr] = line
                # and reset subset nr to zero now
                subset_nr = 0
                subset_nr_track = 0
                # and comments only format, back to one string
                st_comments['%03i-000-0' % set_nr] = ' '.join(line_list[1:])

            # marks the start of a subset
            elif line[0] == '$':
                subset_nr_track += 1
                subset = True
                subset_nr = int(line_list[0][1:])
                # and comments only format, back to one string
                setid = '%03i-%03i-b' % (set_nr, subset_nr)
                st_comments[setid] = ' '.join(line_list[2:])

                # check if the number read corresponds to tracking
                if subset_nr is not subset_nr_track:
                    msg = 'subset_nr and subset_nr_track do not match'
                    raise UserWarning(msg)

                nr_points = int(line_list[1])
                st_dict[setid] = line
                # prepare read data points
                sub_set_arr = scipy.zeros((nr_points,19), dtype=np.float64)
                # keep track of where we are on the data array, initialize
                # to 0 for starters
                point = 0

            # in case we are not in subset mode, we only have comments left
            elif not subset:
                # FIXME: how are we dealing with set comments now?
                # subset comments are coming before the actual subset
                # so we account them to one set later than we are now
                #if subset_nr > 0 :
                key = '%03i-%03i-a' % (set_nr, subset_nr+1)
                # in case it is not the first comment line
                if key in st_dict: st_dict[key] += line
                else: st_dict[key]  = line
                ## otherwise we have the set comments
                #else:
                    #key = '%03i-%03i-a' % (set_nr, subset_nr)
                    ## in case it is not the first comment line
                    #if st_dict.has_key(key): st_dict[key] += line
                    #else: st_dict[key]  = line

            # in case we have the data points, make sure there are enough
            # data poinst present, raise an error if it doesn't
            elif len(line_list)==19 and subset:
                # we can store it in the array
                sub_set_arr[point,:] = line_list
                # on the last entry:
                if point == nr_points-1:
                    # save to the dict:
                    st_dict['%03i-%03i-d' % (set_nr, subset_nr)]= sub_set_arr
                    # and indicate we're done subsetting, next we can have
                    # either set or subset comments
                    subset = False
                point += 1

            #else:
                #msg='error in st format: don't know where to put current line'
                #raise UserWarning, msg

        self.st_dict = st_dict
        self.st_comments = st_comments

    def _format_nr(self, number):
        """
        Automatic format the number

        prec_loss : float, default=0.01
            acceptible precision loss expressed in %

        """

        # the formatting of the number
        numabs = abs(number)
        # just a float precision defined in self.prec_float
        if (numabs < self.float_hi and numabs > self.float_lo):
            numfor = format(number, self.prec_float)
        # if it is zero, just simply print as 0.0
        elif number == 0.0:
            numfor = format(number, ' 1.1f')
        # exponentional, precision defined in self.prec_exp
        else:
            numfor = format(number, self.prec_exp)

        try:
            loss = 100.0*abs(1 - (float(numfor)/number))
        except ZeroDivisionError:
            if abs(float(numfor)) > 0.00000001:
                msg = 'precision loss, from %1.10f to %s' \
                            % (number, numfor.strip())
                raise ValueError('precesion loss for new st file')
            else:
                loss = 0
        if loss > self.prec_loss:
            msg = 'precision loss, from %1.10f to %s (%f pc)' \
                        % (number, numfor.strip(), loss)
            raise ValueError(msg)

        return numfor

    def write_st(self, file_path, file_name, print_header=False):
        """
        prec_loss : float, default=0.01
            acceptible precision loss expressed in %
        """
        # TODO: implement all the tests when writing on nset, number of data
        # points, subsetnumber sequence etc

        content = ''

        # sort the key list
        keysort = list(self.st_dict.keys())
        keysort.sort()

        for key in keysort:

            # in case we are just printing what was recorded before
            if not key.endswith('d'):
                content += self.st_dict[key]
            # else we have an array
            else:
                # cycle through data points and print them orderly: control
                # precision depending on the number, keep spacing constant
                # so it is easy to read the textfile
                for m in range(self.st_dict[key].shape[0]):
                    for n in range(self.st_dict[key].shape[1]):
                        # TODO: check what do we lose here?
                        # we are coming from a np.float64, as set in the array
                        # but than it will not work with the format()
                        number = float(self.st_dict[key][m,n])
                        numfor = self._format_nr(number)
                        content += numfor.rjust(self.col_width)
                    content += '\n'

                if print_header:
                    content += self.column_header_line

        # and write file to disk again
        FILE = open(file_path + file_name, 'w')
        FILE.write(content)
        FILE.close()
        if not self.silent:
            print('st file written:', file_path + file_name)

    def write_latex(self, fpath, selection=[]):
        """
        Write a table in Latex format based on the data in the st file.

        selection : list
            [ [setnr, subsetnr, table caption], [setnr, subsetnr, caption],...]
            if not specified, all subsets will be plotted

        """

        cols_p1 = ['r [m]', 'm [kg/m]', 'm(ri{_x})^2 [kgNm^2]',
                   'm(ri{_y})^2 [kgNm^2]', 'EI_x [Nm^2]', 'EI_y [Nm^2]',
                   'EA [N]', 'GJ [\\frac{Nm^2}{rad}]']

        cols_p2 = ['r [m]', 'x_cg [m]', 'y_cg [m]', 'x_sh [m]', 'y_sh [m]',
                'x_e [m]', 'y_e [m]', 'k_x [-]', 'k_y [-]', 'pitch [deg]']

        if len(selection) < 1:
            for key in self.st_dict:
                # but now only take the ones that hold data
                if key[-1] == 'd':
                    selection.append([int(key[:3]), int(key[4:7])])

        for i,j, caption in selection:
            # get the data
            try:
                # set comment should be the name of the body
                set_comment = self.st_comments['%03i-000-0' % (i)]
#                subset_comment = self.st_comments['%03i-%03i-b' % (i,j)]
                st_arr = self.st_dict['%03i-%03i-d' % (i,j)]
            except AttributeError:
                msg = 'ModelData object md is not loaded properly'
                raise AttributeError(msg)

            # build the latex table header
#            textable = u"\\begin{table}[b!]\n"
#            textable += u"\\begin{center}\n"
            textable_p1 = "\\centering\n"
            textable_p1 += "\\begin{tabular}"
            # configure the column properties
            tmp = ['C{2.0 cm}' for k in cols_p1]
            tmp = "|".join(tmp)
            textable_p1 += '{|' + tmp + '|}'
            textable_p1 += '\hline\n'
            # add formula mode for the headers
            tmp = []
            for k in cols_p1:
                k1, k2 = k.split(' ')
                tmp.append('$%s$ $%s$' % (k1,k2) )
#            tmp = [u'$%s$' % k for k in cols_p1]
            textable_p1 += ' & '.join(tmp)
            textable_p1 += '\\\\ \n'
            textable_p1 += '\hline\n'

            textable_p2 = "\\centering\n"
            textable_p2 += "\\begin{tabular}"
            # configure the column properties
            tmp = ['C{1.5 cm}' for k in cols_p2]
            tmp = "|".join(tmp)
            textable_p2 += '{|' + tmp + '|}'
            textable_p2 += '\hline\n'
            # add formula mode for the headers
            tmp = []
            for k in cols_p2:
                k1, k2 = k.split(' ')
                tmp.append('$%s$ $%s$' % (k1,k2) )
#            tmp = [u'$%s$ $%s$' % (k1, k2) for k in cols_p2]
            # hack: spread the last element over two lines
#            tmp[-1] = '$pitch$ $[deg]$'
            textable_p2 += ' & '.join(tmp)
            textable_p2 += '\\\\ \n'
            textable_p2 += '\hline\n'

            for row in range(st_arr.shape[0]):
                r    = st_arr[row, self.st_headers.r]
                m    = st_arr[row,self.st_headers.m]
                x_cg = st_arr[row,self.st_headers.x_cg]
                y_cg = st_arr[row,self.st_headers.y_cg]
                ri_x = st_arr[row,self.st_headers.ri_x]
                ri_y = st_arr[row,self.st_headers.ri_y]
                x_sh = st_arr[row,self.st_headers.x_sh]
                y_sh = st_arr[row,self.st_headers.y_sh]
                E    = st_arr[row,self.st_headers.E]
                G    = st_arr[row,self.st_headers.G]
                Ixx  = st_arr[row,self.st_headers.Ixx]
                Iyy  = st_arr[row,self.st_headers.Iyy]
                I_p  = st_arr[row,self.st_headers.I_p]
                k_x  = st_arr[row,self.st_headers.k_x]
                k_y  = st_arr[row,self.st_headers.k_y]
                A    = st_arr[row,self.st_headers.A]
                pitch = st_arr[row,self.st_headers.pitch]
                x_e   = st_arr[row,self.st_headers.x_e]
                y_e   = st_arr[row,self.st_headers.y_e]
                # WARNING: same order as the labels defined in variable "cols"!
                p1 = [r, m, m*ri_x*ri_x, m*ri_y*ri_y, E*Ixx, E*Iyy, E*A,I_p*G]
                p2 = [r, x_cg, y_cg, x_sh, y_sh, x_e, y_e, k_x, k_y, pitch]

                textable_p1 += " & ".join([self._format_nr(k) for k in p1])
                textable_p1 += '\\\\ \n'

                textable_p2 += " & ".join([self._format_nr(k) for k in p2])
                textable_p2 += '\\\\ \n'

            # default caption
            if caption == '':
                caption = 'HAWC2 cross sectional parameters for body: %s' % set_comment

            textable_p1 += "\hline\n"
            textable_p1 += "\end{tabular}\n"
            textable_p1 += "\caption{%s}\n" % caption
#            textable += u"\end{center}\n"
#            textable += u"\end{table}\n"

            fname = '%s-%s-%03i-%03i_p1' % (self.st_file, set_comment, i, j)
            fname = fname.replace('.', '') + '.tex'
            with open(fpath + fname, 'w') as f:
                f.write(textable_p1)

            textable_p2 += "\hline\n"
            textable_p2 += "\end{tabular}\n"
            textable_p2 += "\caption{%s}\n" % caption
#            textable += u"\end{center}\n"
#            textable += u"\end{table}\n"

            fname = '%s-%s-%03i-%03i_p2' % (self.st_file, set_comment, i, j)
            fname = fname.replace('.', '') + '.tex'
            with open(fpath + fname, 'w') as f:
                f.write(textable_p2)


class WeibullParameters(object):

    def __init__(self):
        self.Vin = 4.
        self.Vr = 12.
        self.Vout = 26.
        self.Vref = 50.
        self.Vstep = 2.
        self.shape_k = 2.


# FIXME: Cases has a memory leek somewhere, this whole thing needs to be
# reconsidered and rely on a DataFrame instead of a dict!
class Cases(object):
    """
    Class for the old htc_dict
    ==========================

    Formerly known as htc_dict: a dictionary with on the key a case identifier
    (case name) and the value is a dictionary holding all the different tags
    and value pairs which define the case

    TODO:

    define a public API so that plugin's can be exposed in a standarized way
    using pre defined variables:

    * pandas DataFrame backend instead of a dictionary

    * generic, so not bound to HAWC2. Goal: manage a lot of simulations
      and their corresponding inputs/outus

    * integration with OpenMDAO?

    * case id (hash)

    * case name (which is typically created with variable_tag_name method)

    * results

    * inputs

    * outputs

    a variable tags that has a dictionary mirror for database alike searching

    launch, post_launch, prepare_(re)launch should be methods of this or
    inheret from Cases

    Create a method to add and remove cases from the pool so you can perform
    some analysis on them. Maybe make a GUI that present a list with current
    cases in the pool and than checkboxes to remove them.

    Remove the HAWC2 specific parts to a HAWC2 plugin. The HAWC2 plugin will
    inheret from Cases. Proposed class name: HAWC2Cases, XFOILCases

    Rename cases to pool? A pool contains several cases, mixing several
    sim_id's?

    create a unique case ID based on the hash value of all the tag+values?
    """

    # TODO: add a method that can reload a certain case_dict, you change
    # some parameters for each case (or some) and than launch again

    #def __init__(self, post_dir, sim_id, resdir=False):
    def __init__(self, *args, **kwargs):
        """
        Either load the cases dictionary if post_dir and sim_id is given,
        otherwise the input is a cases dictionary

        Paramters
        ---------

        cases : dict
            The cases dictionary in case there is only one argument

        post_dir : str
            When using two arguments

        sim_id : str or list
            When using two arguments

        resdir : str, default=False

        loadstats : boolean, default=False

        rem_failed : boolean, default=True

        """

        resdir = kwargs.get('resdir', False)
        self.loadstats = kwargs.get('loadstats', False)
        self.rem_failed = kwargs.get('rem_failed', True)
        self.config = kwargs.get('config', {})
        print(self.config)
        # determine the input argument scenario
        if len(args) == 1:
            if type(args[0]).__name__ == 'dict':
                self.cases = args[0]
                sim_id = False
            else:
                raise ValueError('One argument input should be a cases dict')
        elif len(args) == 2:
            self.post_dir = args[0]
            sim_id = args[1]
        else:
            raise ValueError('Only one or two arguments are allowed.')

        # if sim_id is a list, than merge all sim_id's of that list
        if type(sim_id).__name__ == 'list':
            # stats, dynprop and fail are empty dictionaries if they do not
            # exist
            self.merge_sim_ids(sim_id)
            # and define a new sim_id based on all items from the list
            self.sim_id = '_'.join(sim_id)
        # in case we still need to load the cases dict
        elif type(sim_id).__name__ == 'str':
            self.sim_id = sim_id
            self._get_cases_dict(self.post_dir, sim_id)
            # load the statistics if applicable
            if self.loadstats:
                self.stats_df, self.Leq_df, self.AEP_df = self.load_stats()

        # change the results directory if applicable
        if resdir:
            self.change_results_dir(resdir)

#        # try to load failed cases and remove them
#        try:
#            self.load_failed(sim_id)
#            self.remove_failed()
#        except IOError:
#            pass

        #return self.cases

    def select(self, search_keyval=False, search_key=False):
        """
        Select only a sub set of the cases

        Select either search_keyval or search_key. Using both is not supported
        yet. Run select twice to achieve the same effect. If both are False,
        cases will be emptied!

        Parameters
        ----------

        search_keyval : dictionary, default=False
            Keys are the column names. If the values match the ones in the
            database, the respective row gets selected. Each tag is hence
            a unique row identifier

        search_key : dict, default=False
            The key is the string that should either be inclusive (value TRUE)
            or exclusive (value FALSE) in the case key
        """

        db = misc.DictDB(self.cases)
        if search_keyval:
            db.search(search_keyval)
        elif search_key:
            db.search_key(search_keyval)
        else:
            db.dict_sel = {}
        # and remove all keys that are not in the list
        remove = set(self.cases) - set(db.dict_sel)
        for k in remove:
            self.cases.pop(k)

    def launch(self, runmethod='local', verbose=False, copyback_turb=True,
           silent=False, check_log=True):
        """
        Launch all cases
        """

        launch(self.cases, runmethod=runmethod, verbose=verbose, silent=silent,
               check_log=check_log, copyback_turb=copyback_turb)

    def post_launch(self, save_iter=False):
        """
        Post Launching Maintenance

        check the logs files and make sure result files are present and
        accounted for.
        """
        # TODO: integrate global post_launch in here
        self.cases_fail = post_launch(self.cases, save_iter=save_iter)

        if self.rem_failed:
            self.remove_failed()

    def load_case(self, case):
        try:
            iterations = self.load_iterations(case)
        except IOError:
            iterations = None
        res = self.load_result_file(case)
        return res, iterations

    def load_iterations(self, case):

        fp = os.path.join(case['[run_dir]'], case['[iter_dir]'],
                          case['[case_id]'])
        return np.loadtxt(fp + '.iter')

    # TODO: HAWC2 result file reading should be moved to Simulations
    # and we should also switch to faster HAWC2 reading!
    def load_result_file(self, case, _slice=False):
        """
        Set the correct HAWC2 channels

        Parameters
        ----------

        case : dict
            a case dictionary holding all the tags set for this specific
            HAWC2 simulation

        Returns
        -------

        res : object
            A HawcPy LoadResults instance with attributes such as sig, ch_dict,
            and much much more

        """

        respath = os.path.join(case['[run_dir]'], case['[res_dir]'])
        resfile = case['[case_id]']
        self.res = windIO.LoadResults(respath, resfile)
        if not _slice:
            _slice = np.r_[0:len(self.res.sig)]
        self.time = self.res.sig[_slice,0]
        self.sig = self.res.sig[_slice,:]
        self.case = case

        return self.res

    def load_struct_results(self, case, max_modes=500, nrmodes=1000):
        """
        Load the structural analysis result files
        """
        fpath = os.path.join(case['[run_dir]'], case['[eigenfreq_dir]'])

        # BEAM OUTPUT
        fname = '%s_beam_output.txt' % case['[case_id]']
        beam = None

        # BODY OUTPUT
        fname = '%s_body_output.txt' % case['[case_id]']
        body = None

        # EIGEN BODY
        fname = '%s_eigen_body.txt' % case['[case_id]']
        try:
            eigen_body, rs2 = windIO.ReadEigenBody(fpath, fname, debug=False,
                                              nrmodes=nrmodes)
        except Exception as e:
            eigen_body = None
            print('failed to load eigen_body')
            print(e)

        # EIGEN STRUCT
        fname = '%s_eigen_struct.txt' % case['[case_id]']
        try:
            eigen_struct = windIO.ReadEigenStructure(fpath, fname, debug=False,
                                                     max_modes=max_modes)
        except Exception as e:
            eigen_struct = None
            print('failed to load eigen_struct')
            print(e)

        # STRUCT INERTIA
        fname = '%s_struct_inertia.txt' % case['[case_id]']
        struct_inertia = None

        return beam, body, eigen_body, eigen_struct, struct_inertia

    def change_results_dir(self, forcedir, post_dir=False):
        """
        if the post processing concerns simulations done by thyra/gorm, and
        is downloaded locally, change path to results accordingly

        """
        for case in self.cases:
            self.cases[case]['[run_dir]'] = forcedir
            if post_dir:
                self.cases[case]['[post_dir]'] = post_dir

        #return cases

    def force_lower_case_id(self):
        tmp_cases = {}
        for cname, case in self.cases.items():
            tmp_cases[cname.lower()] = case.copy()
        self.cases = tmp_cases

    def _get_cases_dict(self, post_dir, sim_id):
        """
        Load the pickled dictionary containing all the cases and their
        respective tags.

        Returns
        -------

        cases : Cases object
            cases with failures removed. Failed cases are kept in
            self.cases_fail

        """
        self.cases = load_pickled_file(os.path.join(post_dir, sim_id + '.pkl'))
        self.cases_fail = {}

        self.force_lower_case_id()

        if self.rem_failed:
            try:
                self.load_failed(sim_id)
                # ditch all the failed cases out of the htc_dict otherwise
                #  we will have fails when reading the results data files
                self.remove_failed()
            except IOError:
                print("couldn't find pickled failed dictionary")

        return

    def cases2df(self):
        """Convert the cases dict to a DataFrame and save as excel sheet"""

        tag_set = []

        # maybe some cases have tags that others don't, create a set with
        # all the tags that occur
        for cname, tags in self.cases.items():
            tag_set.extend(list(tags.keys()))
        # also add cname as a tag
        tag_set.append('cname')
        # only unique tags
        tag_set = set(tag_set)
        # and build the df_dict with all the tags
        df_dict = {tag:[] for tag in tag_set}

        for cname, tags in self.cases.items():
            current_tags = set(tags.keys())
            for tag, value in tags.items():
                df_dict[tag].append(value)
            # and the missing ones
            for tag in (tag_set - current_tags):
                df_dict[tag].append('')

        df_dict2 = misc.df_dict_check_datatypes(df_dict)

        return pd.DataFrame(df_dict2)

    def merge_sim_ids(self, sim_id_list, silent=False):
        """
        Load and merge for a list of sim_id's cases, fail, dynprop and stats
        ====================================================================

        For all sim_id's in the sim_id_list the cases, stats, fail and dynprop
        dictionaries are loaded. If one of them doesn't exists, an empty
        dictionary is returned.

        Currently, there is no warning given when a certain case will be
        overwritten upon merging.

        """

        cases_merged = {}
        cases_fail_merged = {}

        for ii, sim_id in enumerate(sim_id_list):

            # TODO: give a warning if we have double entries or not?
            self.sim_id = sim_id
            self._get_cases_dict(self.post_dir, sim_id)
            cases_fail_merged.update(self.cases_fail)

            # and copy to htc_dict_merged. Note that non unique keys will be
            # overwritten: each case has to have a unique name!
            cases_merged.update(self.cases)

            # merge the statistics if applicable
            # self.stats_dict[channels] = df
            if self.loadstats:
                if ii == 0:
                    self.stats_df, self.Leq_df, self.AEP_df = self.load_stats()
                else:
                    tmp1, tmp2, tmp3 = self.load_stats()
                    self.stats_df = self.stats_df.append(tmp1)
                    self.Leq_df = self.Leq_df.append(tmp2)
                    self.AEP_df = self.AEP_df.append(tmp3)

        self.cases = cases_merged
        self.cases_fail = cases_fail_merged

    def printall(self, scenario, figpath=''):
        """
        For all the cases, get the average value of a certain channel
        """
        self.figpath = figpath

        # plot for each case the dashboard
        for k in self.cases:

            if scenario == 'blade_deflection':
                self.blade_deflection(self.cases[k], self.figpath)

    def diff(self, refcase_dict, cases):
        """
        See wich tags change over the given cases of the simulation object
        """

        # there is only one case allowed in refcase dict
        if not len(refcase_dict) == 1:
            return ValueError, 'Only one case allowed in refcase dict'

        # take an arbritrary case as baseline for comparison
        refcase = refcase_dict[list(refcase_dict.keys())[0]]
        #reftags = sim_dict[refcase]

        diffdict = dict()
        adddict = dict()
        remdict = dict()
        print()
        print('*'*80)
        print('comparing %i cases' % len(cases))
        print('*'*80)
        print()
        # compare each case with the refcase and see if there are any diffs
        for case in sorted(cases.keys()):
            dd = misc.DictDiff(refcase, cases[case])
            diffdict[case] = dd.changed()
            adddict[case] = dd.added()
            remdict[case] = dd.removed()
            print('')
            print('='*80)
            print(case)
            print('='*80)
            for tag in sorted(diffdict[case]):
                print(tag.rjust(20),':', cases[case][tag])

        return diffdict, adddict, remdict

    def blade_deflection(self, case, **kwargs):
        """
        """

        # read the HAWC2 result file
        self.load_result_file(case)

        # select all the y deflection channels
        db = misc.DictDB(self.res.ch_dict)

        db.search({'sensortype' : 'state pos', 'component' : 'z'})
        # sort the keys and save the mean values to an array/list
        chiz, zvals = [], []
        for key in sorted(db.dict_sel.keys()):
            zvals.append(-self.sig[:,db.dict_sel[key]['chi']].mean())
            chiz.append(db.dict_sel[key]['chi'])

        db.search({'sensortype' : 'state pos', 'component' : 'y'})
        # sort the keys and save the mean values to an array/list
        chiy, yvals = [], []
        for key in sorted(db.dict_sel.keys()):
            yvals.append(self.sig[:,db.dict_sel[key]['chi']].mean())
            chiy.append(db.dict_sel[key]['chi'])

        return np.array(zvals), np.array(yvals)

    def remove_failed(self):

        # don't do anything if there is nothing defined
        if self.cases_fail == None:
            print('no failed cases to remove')
            return

        # ditch all the failed cases out of the htc_dict
        # otherwise we will have fails when reading the results data files
        for k in self.cases_fail:
            try:
                self.cases_fail[k] = copy.copy(self.cases[k])
                del self.cases[k]
                print('removed from htc_dict due to error: ' + k)
            except KeyError:
                print('WARNING: failed case does not occur in cases')
                print('   ', k)

    def load_failed(self, sim_id):

        fname = os.path.join(self.post_dir, sim_id + '_fail.pkl')
        FILE = open(fname, 'rb')
        self.cases_fail = pickle.load(FILE)
        FILE.close()

    def load_stats(self, **kwargs):
        """
        Load an existing statistcs file

        Parameters
        ----------

        post_dir : str, default=self.post_dir

        sim_id : str, default=self.sim_id

        fpath : str, default=sim_id

        leq : bool, default=False

        columns : list, default=None
        """
        post_dir = kwargs.get('post_dir', self.post_dir)
        sim_id = kwargs.get('sim_id', self.sim_id)
        fpath = os.path.join(post_dir, sim_id)
        Leq_df = kwargs.get('leq', False)
        columns = kwargs.get('columns', None)

        try:
            stats_df = pd.read_hdf(fpath + '_statistics.h5', 'table',
                                   columns=columns)
#            FILE = open(post_dir + sim_id + '_statistics.pkl', 'rb')
#            stats_dict = pickle.load(FILE)
#            FILE.close()
        except IOError:
            stats_df = None
            print('NO STATS FOUND FOR', sim_id)

        try:
            AEP_df = pd.read_hdf(fpath + '_AEP.h5', 'table')
        except IOError:
            AEP_df = None
            print('NO AEP FOUND FOR', sim_id)

        if Leq_df:
            try:
                Leq_df = pd.read_hdf(fpath + '_Leq.h5', 'table')
            except IOError:
                Leq_df = None
                print('NO Leq FOUND FOR', sim_id)

        return stats_df, Leq_df, AEP_df

    def statistics(self, new_sim_id=False, silent=False, ch_sel=None,
                   tags=['[turb_seed]','[windspeed]'], calc_mech_power=False,
                   save=True, m=[3, 4, 6, 8, 10, 12], neq=None, no_bins=46,
                   ch_fatigue={}, update=False, add_sensor=None,
                   chs_resultant=[], i0=0, i1=-1, saveinterval=1000,
                   csv=True, suffix=None, fatigue_cycles=False, A=None,
                   ch_wind=None, save_new_sigs=False, xlsx=False):
        """
        Calculate statistics and save them in a pandas dataframe. Save also
        every 500 cases the statistics file.

        Parameters
        ----------

        ch_sel : list, default=None
            If defined, only add defined channels to the output data frame.
            The list should contain valid channel names as defined in ch_dict.

        tags : list, default=['[turb_seed]','[windspeed]']
            Select which tag values from cases should be included in the
            dataframes. This will help in selecting and identifying the
            different cases.

        ch_fatigue : list, default=[]
            Valid ch_dict channel names for which the equivalent fatigue load
            needs to be calculated. When set to None, ch_fatigue = ch_sel,
            and hence all channels will have a fatigue analysis.

        fatigue_cycles : Boolean, default=False
            If True, the cycle matrix, or sum( n_i*S_i^m ), is calculated. If
            set to False, the 1Hz equivalent load is calculated.

        chs_resultant

        add_sensor

        calc_mech_power

        saveinterval : int, default=1000
            When processing a large number of cases, the statistics file
            will be saved every saveinterval-ed case

        update : boolean, default=False
            Update an existing DataFrame instead of overwriting one. When
            the number of cases is larger then saveinterval, the statistics
            file will be updated every saveinterval-ed case

        suffix : boolean or str, default=False
            When True, the statistics data file will be appended with a suffix
            that corresponds to the index of the last case added. When a string,
            that suffix will be added to the file name (up to but excluding,
            much like range()). Set to True when a large number of cases is
            being considered in order to avoid excessively large DataFrames.

        csv : boolean, default=False
            In addition to a h5 file, save the statistics also in csv format.

        xlsx : boolean, default=False
            In addition to a h5 file, save the statistics also in MS Excel xlsx
            format.

        Returns
        -------

        dfs : dict
            Dictionary of dataframes, where the key is the channel name of
            the output (that was optionally defined in ch_sel), and the value
            is the dataframe containing the statistical values for all the
            different selected cases.

        """

        def add_df_row(df_dict, **kwargs):
            """
            add a new channel to the df_dict format of ch_df
            """
            for col, value in kwargs.items():
                df_dict[col].append(value)
            for col in (self.res.cols - set(kwargs.keys())):
                df_dict[col].append('')
            return df_dict

        # in case the output changes, remember the original ch_sel
        if ch_sel is not None:
            ch_sel_init = ch_sel.copy()
        else:
            ch_sel_init = None

        if ch_fatigue is None:
            ch_fatigue_init = None
        else:
            ch_fatigue_init = ch_fatigue

        # TODO: should the default tags not be all the tags in the cases dict?
        tag_default = ['[case_id]', '[sim_id]']
        tag_chan = 'channel'
        # merge default with other tags
        for tag in tag_default:
            if tag not in tags:
                tags.append(tag)

        # tags can only be unique, when there the same tag appears twice
        # it will break the DataFrame creation
        if len(tags) is not len(set(tags)):
            raise ValueError('tags can only contain unique entries')

        # get some basic parameters required to calculate statistics
        try:
            case = list(self.cases.keys())[0]
        except IndexError:
            print('no cases to select so no statistics, aborting ...')
            return None

        post_dir = self.cases[case]['[post_dir]']
        if not new_sim_id:
            # select the sim_id from a random case
            sim_id = self.cases[case]['[sim_id]']
        else:
            sim_id = new_sim_id

        if not silent:
            nrcases = len(self.cases)
            print('='*79)
            print('statistics for %s, nr cases: %i' % (sim_id, nrcases))

        df_dict = None
        add_stats = True

        for ii, (cname, case) in enumerate(self.cases.items()):

            # build the basic df_dict if not defined
            if df_dict is None:
                # the dictionary that will be used to create a pandas dataframe
                df_dict = { tag:[] for tag in tags }
                df_dict[tag_chan] = []
                # add more columns that will help with IDing the channel
                df_dict['channel_name'] = []
                df_dict['channel_units'] = []
                df_dict['channel_nr'] = []
                df_dict['channel_desc'] = []
                add_stats = True

            if not silent:
                pc = '%6.2f' % (float(ii)*100.0/float(nrcases))
                pc += ' %'
                print('stats progress: %4i/%i %s' % (ii, nrcases, pc))

            # make sure the selected tags exist
            if len(tags) != len(set(case) and tags):
                raise KeyError('    not all selected tags exist in cases')

            self.load_result_file(case)
            ch_dict_new = {}
            # this is really messy, now we are also in parallal using the
            # channel DataFrame structure
            ch_df_new = {col:[] for col in self.res.cols}
            ch_df_new['ch_name'] = []
            # calculate the statistics values
#            stats = self.res.calc_stats(self.sig, i0=i0, i1=i1)
            i_new_chans = self.sig.shape[1] # self.Nch
            sig_size = self.res.N  # len(self.sig[i0:i1,0])
            new_sigs = np.ndarray((sig_size, 0))

            if add_sensor is not None:
                chi1 = self.res.ch_dict[add_sensor['ch1_name']]['chi']
                chi2 = self.res.ch_dict[add_sensor['ch2_name']]['chi']
                name = add_sensor['ch_name_add']
                factor = add_sensor['factor']
                operator = add_sensor['operator']

                p1 = self.sig[:,chi1]
                p2 = self.sig[:,chi2]
                sig_add = np.ndarray((len(p1), 1))
                if operator == '*':
                    sig_add[:,0] = p1*p2*factor