Simulations.py

                elif operator == '/':
                    sig_add[:,0] = factor*p1/p2
                else:
                    raise ValueError('Operator needs to be either * or /')
#                add_stats = self.res.calc_stats(sig_add)
#                add_stats_i = stats['max'].shape[0]
                # add a new channel description for the mechanical power
                ch_dict_new[name] = {}
                ch_dict_new[name]['chi'] = i_new_chans
                ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
                                                   'ch_name':name})
                i_new_chans += 1
                new_sigs = np.append(new_sigs, sig_add, axis=1)
#                # and append to all the statistics types
#                for key, stats_arr in stats.iteritems():
#                    stats[key] = np.append(stats_arr, add_stats[key])

            # calculate the resultants
            sig_resultants = np.ndarray((sig_size, len(chs_resultant)))
            inc = []
            for j, chs in enumerate(chs_resultant):
                sig_res = np.ndarray((sig_size, len(chs)))
                lab = ''
                no_channel = False
                for i, ch in enumerate(chs):
                    # if the channel does not exist, zet to zero
                    try:
                        chi = self.res.ch_dict[ch]['chi']
                        sig_res[:,i] = self.sig[:,chi]
                        no_channel = False
                    except KeyError:
                        no_channel = True
                    lab += ch.split('-')[-1]
                name = '-'.join(ch.split('-')[:-1] + [lab])
                # when on of the components do no exist, we can not calculate
                # the resultant!
                if no_channel:
                    rpl = (name, cname)
                    print('    missing channel, no resultant for: %s, %s' % rpl)
                    continue
                inc.append(j)
                sig_resultants[:,j] = np.sqrt(sig_res*sig_res).sum(axis=1)
#                resultant = np.sqrt(sig_resultants[:,j].reshape(self.res.N, 1))
#                add_stats = self.res.calc_stats(resultant)
#                add_stats_i = stats['max'].shape[0]
                # add a new channel description for this resultant
                ch_dict_new[name] = {}
                ch_dict_new[name]['chi'] = i_new_chans
                ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
                                                   'ch_name':name})
                i_new_chans += 1
                # and append to all the statistics types
#                for key, stats_arr in stats.iteritems():
#                    stats[key] = np.append(stats_arr, add_stats[key])
            if len(chs_resultant) > 0:
                # but only take the channels that where not missing
                new_sigs = np.append(new_sigs, sig_resultants[:,inc], axis=1)

            # calculate mechanical power first before deriving statistics
            # from it
            if calc_mech_power:
                name = 'stats-shaft-power'
                sig_pmech = np.ndarray((sig_size, 1))
                sig_pmech[:,0] = self.shaft_power()
#                P_mech_stats = self.res.calc_stats(sig_pmech)
#                mech_stats_i = stats['max'].shape[0]
                # add a new channel description for the mechanical power
                ch_dict_new[name] = {}
                ch_dict_new[name]['chi'] = i_new_chans
                ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
                                                   'ch_name':name})
                i_new_chans += 1
                new_sigs = np.append(new_sigs, sig_pmech, axis=1)

                # and C_p_mech
                if A is not None:
                    name = 'stats-cp-mech'
                    if ch_wind is None:
                        chiwind = self.res.ch_dict[self.find_windchan_hub()]['chi']
                    else:
                        chiwind = self.res.ch_dict[ch_wind]['chi']
                    wind = self.res.sig[:,chiwind]
                    cp = np.ndarray((sig_size, 1))
                    cp[:,0] = self.cp(-sig_pmech[:,0], wind, A)
                    # add a new channel description for the mechanical power
                    ch_dict_new[name] = {}
                    ch_dict_new[name]['chi'] = i_new_chans
                    ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
                                                       'ch_name':name})
                    i_new_chans += 1
                    new_sigs = np.append(new_sigs, cp, axis=1)

                    try:
                        try:
                            nn_shaft = self.config['nn_shaft']
                        except:
                            nn_shaft = 4

                        chan_t = 'shaft_nonrotate-shaft-node-%3.3i-forcevec-z'%nn_shaft
                        i = self.res.ch_dict[chan_t]['chi']
                        thrust = self.res.sig[:,i]
                        name = 'stats-ct'
                        ct = np.ndarray((sig_size, 1))
                        ct[:,0] = self.ct(thrust, wind, A)
                        ch_dict_new[name] = {}
                        ch_dict_new[name]['chi'] = i_new_chans
                        ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
                                                           'ch_name':name})
                        i_new_chans += 1
                        new_sigs = np.append(new_sigs, ct, axis=1)
                    except KeyError:
                        print('    can not calculate CT')

                # and append to all the statistics types
#                for key, stats_arr in stats.iteritems():
#                    stats[key] = np.append(stats_arr, P_mech_stats[key])

            if save_new_sigs and new_sigs.shape[1] > 0:
                chis, keys = [], []
                for key, value in ch_dict_new.items():
                    chis.append(value['chi'])
                    keys.append(key)
                # sort on channel number, so it agrees with the new_sigs array
                isort = np.array(chis).argsort()
                keys = np.array(keys)[isort].tolist()
                df_new_sigs = pd.DataFrame(new_sigs, columns=keys)
                respath = os.path.join(case['[run_dir]'], case['[res_dir]'])
                resfile = case['[case_id]']
                fname = os.path.join(respath, resfile + '_postres.h5')
                print('    saving post-processed res: %s...' % fname, end='')
                df_new_sigs.to_hdf(fname, 'table', mode='w', format='table',
                                   complevel=9, complib='blosc')
                print('done!')
                del df_new_sigs

            ch_dict = self.res.ch_dict.copy()
            ch_dict.update(ch_dict_new)

#            ch_df = pd.concat([self.res.ch_df, pd.DataFrame(ch_df_new)])

            # put all the extra channels into the results if we want to also
            # be able to calculate the fatigue loads on them.
            self.sig = np.append(self.sig, new_sigs, axis=1)

            # calculate the statistics values
            stats = self.res.calc_stats(self.sig, i0=i0, i1=i1)

            # Because each channel is a new row, it doesn't matter how many
            # data channels each case has, and this approach does not brake
            # when different cases have a different number of output channels
            # By default, just take all channels in the result file.
            if ch_sel_init is None:
                ch_sel = list(ch_dict.keys())
#                ch_sel = ch_df.ch_name.tolist()
#                ch_sel = [str(k) for k in ch_sel]
                print('    selecting all channels for statistics')

            # calculate the fatigue properties from selected channels
            fatigue, tags_fatigue = {}, []
            if ch_fatigue_init is None:
                ch_fatigue = ch_sel
                print('    selecting all channels for fatigue')
            else:
                ch_fatigue = ch_fatigue_init

            for ch_id in ch_fatigue:
                chi = ch_dict[ch_id]['chi']
                signal = self.sig[:,chi]
                if neq is None:
                    neq = float(case['[duration]'])
                if not fatigue_cycles:
                    eq = self.res.calc_fatigue(signal, no_bins=no_bins,
                                               neq=neq, m=m)
                else:
                    eq = self.res.cycle_matrix(signal, no_bins=no_bins, m=m)
                fatigue[ch_id] = {}
                # when calc_fatigue succeeds, we should have as many items
                # as in m
                if len(eq) == len(m):
                    for eq_, m_ in zip(eq, m):
                        fatigue[ch_id]['m=%2.01f' % m_] = eq_
                # when it fails, we get an empty list back
                else:
                    for m_ in m:
                        fatigue[ch_id]['m=%2.01f' % m_] = np.nan

            # build the fatigue tags
            for m_ in m:
                tag = 'm=%2.01f' % m_
                tags_fatigue.append(tag)

            # -----------------------------------------------------------------
            # define the pandas data frame dict on first run
            # -----------------------------------------------------------------
            # Only build the ch_sel collection once. By definition, the
            # statistics, fatigue and htc tags will not change
            if add_stats:
                # statistical parameters
                for statparam in list(stats.keys()):
                    df_dict[statparam] = []
#                # additional tags
#                for tag in tags:
#                    df_dict[tag] = []
                # fatigue data
                for tag in tags_fatigue:
                    df_dict[tag] = []
                add_stats = False

            for ch_id in ch_sel:

                chi = ch_dict[ch_id]['chi']
                # ch_name is not unique anymore, this doesn't work obviously!
                # use the channel index instead, that is unique
#                chi = ch_df[ch_df.ch_name==ch_id].chi.values[0]

                # sig_stat = [(0=value,1=index),statistic parameter, channel]
                # stat params = 0 max, 1 min, 2 mean, 3 std, 4 range, 5 abs max
                # note that min, mean, std, and range are not relevant for index
                # values. Set to zero there.

                # -------------------------------------------------------------
                # Fill in all the values for the current data entry
                # -------------------------------------------------------------

                # the auxiliry columns
                try:
                    name = self.res.ch_details[chi,0]
                    unit = self.res.ch_details[chi,1]
                    desc = self.res.ch_details[chi,2]
                # the new channels from new_sigs are not in here
                except (IndexError, AttributeError) as e:
                    name = ch_id
                    desc = ''
                    unit = ''
                df_dict['channel_name'].append(name)
                df_dict['channel_units'].append(unit)
                df_dict['channel_desc'].append(desc)
                df_dict['channel_nr'].append(chi)

                # each df line is a channel of case that needs to be id-eed
                df_dict[tag_chan].append(ch_id)

                # for all the statistics keys, save the values for the
                # current channel
                for statparam in list(stats.keys()):
                    df_dict[statparam].append(stats[statparam][chi])
                # and save the tags from the input htc file in order to
                # label each different case properly
                for tag in tags:
                    df_dict[tag].append(case[tag])
                # append any fatigue channels if applicable, otherwise nan
                if ch_id in fatigue:
                    for m_fatigue, eq_ in fatigue[ch_id].items():
                        df_dict[m_fatigue].append(eq_)
                else:
                    for tag in tags_fatigue:
                        # TODO: or should this be NaN?
                        df_dict[tag].append(np.nan)
            # when dealing with a lot of cases, save the stats data at
            # intermediate points to avoid memory issues
            if math.fmod(ii+1, saveinterval) == 0.0:
                df_dict2 = self._df_dict_check_datatypes(df_dict)
                # convert, save/update
                if isinstance(suffix, str):
                    ext = suffix
                elif suffix is True:
                    ext = '_%06i' % (ii+1)
                else:
                    ext = ''
#                dfs = self._df_dict_save(df_dict2, post_dir, sim_id, save=save,
#                                         update=update, csv=csv, suffix=ext)
                # TODO: test this first
                fname = os.path.join(post_dir, sim_id + '_statistics' + ext)
                dfs = misc.dict2df(df_dict2, fname, save=save, update=update,
                                   csv=csv, xlsx=xlsx, check_datatypes=False)

                df_dict2 = None
                df_dict = None
                add_stats = True

        # only save again when there is actual data in df_dict
        if df_dict is not None:
            # make consistent data types
            df_dict2 = self._df_dict_check_datatypes(df_dict)
            # convert, save/update
            if isinstance(suffix, str):
                ext = suffix
            elif suffix is True:
                ext = '_%06i' % ii
            else:
                ext = ''
#            dfs = self._df_dict_save(df_dict2, post_dir, sim_id, save=save,
#                                     update=update, csv=csv, suffix=ext)
            # TODO: test this first
            fname = os.path.join(post_dir, sim_id + '_statistics' + ext)
            dfs = misc.dict2df(df_dict2, fname, save=save, update=update,
                               csv=csv, xlsx=xlsx, check_datatypes=False)

        return dfs

    def _add2newsigs(self, ch_dict, name, i_new_chans, new_sigs, addendum):

        ch_dict[name] = {}
        ch_dict[name]['chi'] = i_new_chans
        i_new_chans += 1
        return ch_dict, np.append(new_sigs, addendum, axis=1)

    # TODO: use the version in misc instead.
    def _df_dict_save(self, df_dict2, post_dir, sim_id, save=True,
                      update=False, csv=True, suffix=None):
        """
        Convert the df_dict to df and save/update.

        DEPRICATED, use misc.dict2df instead
        """
        if isinstance(suffix, str):
            fpath = os.path.join(post_dir, sim_id + '_statistics' + suffix)
        else:
            fpath = os.path.join(post_dir, sim_id + '_statistics')

        # in case converting to dataframe fails, fall back
        try:
            dfs = pd.DataFrame(df_dict2)
        except Exception as e:

            FILE = open(fpath + '.pkl', 'wb')
            pickle.dump(df_dict2, FILE, protocol=2)
            FILE.close()
            # check what went wrong
            misc.check_df_dict(df_dict2)
            print('failed to convert to data frame, saved as dict')
            raise(e)

#        # apply categoricals to objects
#        for column_name, column_dtype in dfs.dtypes.iteritems():
#            # applying categoricals mostly makes sense for objects
#            # we ignore all others
#            if column_dtype.name == 'object':
#                dfs[column_name] = dfs[column_name].astype('category')

        # and save/update the statistics database
        if save:
            if update:
                print('updating statistics: %s ...' % (post_dir + sim_id), end='')
                try:
                    dfs.to_hdf('%s.h5' % fpath, 'table', mode='r+', append=True,
                               format='table', complevel=9, complib='blosc')
                except IOError:
                    print('Can not update, file does not exist. Saving instead'
                          '...', end='')
                    dfs.to_hdf('%s.h5' % fpath, 'table', mode='w',
                               format='table', complevel=9, complib='blosc')
            else:
                print('saving statistics: %s ...' % (post_dir + sim_id), end='')
                if csv:
                    dfs.to_csv('%s.csv' % fpath)
                dfs.to_hdf('%s.h5' % fpath, 'table', mode='w',
                           format='table', complevel=9, complib='blosc')

            print('DONE!!\n')

        return dfs

    # TODO: use the version in misc instead.
    def _df_dict_check_datatypes(self, df_dict):
        """
        there might be a mix of strings and numbers now, see if we can have
        the same data type throughout a column
        nasty hack: because of the unicode -> string conversion we might not
        overwrite the same key in the dict.

        DEPRICATED, use misc.df_dict_check_datatypes instead
        """
        # FIXME: this approach will result in twice the memory useage though...
        # we can not pop/delete items from a dict while iterating over it
        df_dict2 = {}
        for colkey, col in df_dict.items():
            # if we have a list, convert to string
            if type(col[0]).__name__ == 'list':
                for ii, item in enumerate(col):
                    col[ii] = '**'.join(item)
            # if we already have an array (statistics) or a list of numbers
            # do not try to cast into another data type, because downcasting
            # in that case will not raise any exception
            elif type(col[0]).__name__[:3] in ['flo', 'int', 'nda']:
                df_dict2[str(colkey)] = np.array(col)
                continue
            # in case we have unicodes instead of strings, we need to convert
            # to strings otherwise the saved .h5 file will have pickled elements
            try:
                df_dict2[str(colkey)] = np.array(col, dtype=np.int32)
            except OverflowError:
                try:
                    df_dict2[str(colkey)] = np.array(col, dtype=np.int64)
                except OverflowError:
                    df_dict2[str(colkey)] = np.array(col, dtype=np.float64)
            except ValueError:
                try:
                    df_dict2[str(colkey)] = np.array(col, dtype=np.float64)
                except ValueError:
                    df_dict2[str(colkey)] = np.array(col, dtype=np.str)
            except TypeError:
                # in all other cases, make sure we have converted them to
                # strings and NOT unicode
                df_dict2[str(colkey)] = np.array(col, dtype=np.str)
            except Exception as e:
                print('failed to convert column %s to single data type' % colkey)
                raise(e)
        return df_dict2

    def fatigue_lifetime(self, dfs, neq, res_dir='res/', fh_lst=None, years=20.,
                         dlc_folder="dlc%s_iec61400-1ed3/", extra_cols=[],
                         save=False, update=False, csv=False, new_sim_id=False,
                         xlsx=False):
        """
        Cacluate the fatigue over a selection of cases and indicate how many
        hours each case contributes to its life time.

        This approach can only work reliably if the common DLC folder
        structure is followed.

        Parameters
        ----------

        dfs : DataFrame
            Statistics Pandas DataFrame. When extra_cols is not defined, it
            should only hold the results of one standard organized DLC (one
            turbine, one inflow case).

        neq : float
            Reference number of cycles. Usually, neq is either set to 10e6,
            10e7 or 10e8.

        res_dir : str, default='res/'
            Base directory of the results. Results would be located in
            res/dlc_folder/*.sel

        dlc_folder : str, default="dlc%s_iec61400-1ed3/"
            String with the DLC subfolder names. One string substitution is
            required (%s), and should represent the DLC number (withouth comma
            or point). Not relevant when fh_lst is defined.

        extra_cols : list, default=[]
            The included columns are the material constants, and each row is
            a channel. When multiple DLC cases are included in dfs, the user
            has to define additional columns in order to distinguish between
            the DLC cases.

        fh_lst : list, default=None
            Number of hours for each case over its life time. Format:
            [(filename, hours),...] where, filename is the name of the file
            (can be a full path, but only the base path is considered), hours
            is the number of hours over the life time. When fh_lst is set,
            dlc_folder and dlc_name are not used.

        years : float, default=20
            Total life time expressed in years.

        Returns
        -------

        df_Leq : DataFrame
            Pandas DataFrame with the life time equivalent load for the given
            neq, all the channels, and a range of material parameters m.
        """

        print('Calculating life time fatigue load')

        # get some basic parameters required to calculate statistics
        try:
            case = list(self.cases.keys())[0]
        except IndexError:
            print('no cases to select so no statistics, aborting ...')
            return None
        post_dir = self.cases[case]['[post_dir]']
        if not new_sim_id:
            # select the sim_id from a random case
            sim_id = self.cases[case]['[sim_id]']
        else:
            sim_id = new_sim_id

        if fh_lst is None:
            wb = WeibullParameters()
            if 'Weibull' in self.config:
                for key in self.config['Weibull']:
                    setattr(wb, key, self.config['Weibull'][key])

            # we assume the run_dir (root) is the same every where
            run_dir = self.cases[case]['[run_dir]']
            fname = os.path.join(run_dir, 'htc', 'DLCs', 'dlc_config.xlsx')
            dlc_cfg = dlc.DLCHighLevel(fname, shape_k=wb.shape_k)
            # if you need all DLCs, make sure to have %s in the file name
            dlc_cfg.res_folder = os.path.join(run_dir, res_dir, dlc_folder)
            fh_lst = dlc_cfg.file_hour_lst(years=years)

        # now we have a full path to the result files, but we only need the
        # the case_id to indentify the corresponding entry from the statistics
        # DataFrame (exluciding the .sel extension)
        case_ids = [os.path.basename(k[0].replace('.sel', '')) for k in fh_lst]
        hours = [k[1] for k in fh_lst]

        # ---------------------------------------------------------------------
        # column definitions
        # ---------------------------------------------------------------------
        # available material constants
        ms, cols = [], []
        for key in dfs:
            if key[:2] == 'm=':
                ms.append(key)
        # when multiple DLC cases are included, add extra cols to identify each
        # DLC group.
        extra_cols.append('channel')
        cols = copy.copy(ms)
        cols.extend(extra_cols)
        # ---------------------------------------------------------------------

        # Built the DataFrame, we do not have a unqique channel index
        dict_Leq = {col:[] for col in cols}
        # index on case_id on the original DataFrame so we can select accordingly
        dfs = dfs.set_index('[case_id]')
        # which rows to keep: a
        # select for each channel all the cases
        for grname, gr in dfs.groupby(dfs.channel):
            # if one m has any nan's, assume none of them are good and throw
            # away
#            if np.isnan(gr[ms[0]].values).any():
#                sel_rows.pop(grname)
#                continue
            # select the cases in the same order as the corresponding hours
            try:
                sel_sort = gr.loc[case_ids]
            except KeyError:
                print('    ignore sensor for Leq:', grname)
            for col in extra_cols:
                # at this stage we already should have one case, so its
                # identifiers should also be.
                val_unique = sel_sort[col].unique()
                if len(val_unique) > 1:
                    print('found %i sets instead of 1:' % len(val_unique))
                    print(val_unique)
                    raise ValueError('For Leq load, the given DataFrame can '
                                     'only hold one complete DLC set.')
                # values of the identifier columns for each case. We do this
                # in case the original dfs holds multiple DLC cases.
                dict_Leq[col].append(sel_sort[col].unique()[0])
            for m in ms:
                # sel_sort[m] holds the cycle_matrices for each of the DLC
                # cases: such all the different wind speeds for dlc1.2
                R_eq = (sel_sort[m].values*np.array(hours)).sum()
                # the effective Leq for each of the material constants
                dict_Leq[m].append(math.pow(R_eq/neq, 1.0/float(m[2:])))
                # the following is twice as slow:
                # [i*j for (i,j) in zip(sel_sort[m].values.tolist(),hours)]

#        collens = misc.check_df_dict(dict_Leq)
        # make consistent data types, and convert to DataFrame
        fname = os.path.join(post_dir, sim_id + '_Leq')
        df_Leq = misc.dict2df(dict_Leq, fname, save=save, update=update,
                              csv=csv, check_datatypes=True, xlsx=xlsx)

        # only keep the ones that do not have nan's (only works with index)
        return df_Leq

    def AEP(self, dfs, fh_lst=None, ch_powe=None, extra_cols=[], update=False,
            res_dir='res/', dlc_folder="dlc%s_iec61400-1ed3/", csv=False,
            new_sim_id=False, save=False, years=20.0, xlsx=False):

        """
        Calculate the Annual Energy Production (AEP) for DLC1.2 cases.

        Parameters
        ----------

        dfs : DataFrame
            Statistics Pandas DataFrame. When extra_cols is not defined, it
            should only hold the results of one standard organized DLC (one
            turbine, one inflow case).

        fh_lst : list, default=None
            Number of hours for each case over its life time. Format:
            [(filename, hours),...] where, filename is the name of the file
            (can be a full path, but only the base path is considered), hours
            is the number of hours over the life time. When fh_lst is set,
            dlc_folder and dlc_name are not used.

        ch_powe : string, default=None

        extra_cols : list, default=[]
            The included column is just the AEP, and each row is
            a channel. When multiple DLC cases are included in dfs, the user
            has to define additional columns in order to distinguish between
            the DLC cases.

        res_dir : str, default='res/'
            Base directory of the results. Results would be located in
            res/dlc_folder/*.sel

        dlc_folder : str, default="dlc%s_iec61400-1ed3/"
            String with the DLC subfolder names. One string substitution is
            required (%s), and should represent the DLC number (withouth comma
            or point). Not relevant when fh_lst is defined.
        """

        # get some basic parameters required to calculate statistics
        try:
            case = list(self.cases.keys())[0]
        except IndexError:
            print('no cases to select so no statistics, aborting ...')
            return None
        post_dir = self.cases[case]['[post_dir]']
        if not new_sim_id:
            # select the sim_id from a random case
            sim_id = self.cases[case]['[sim_id]']
        else:
            sim_id = new_sim_id

        if fh_lst is None:
            wb = WeibullParameters()
            if 'Weibull' in self.config:
                for key in self.config['Weibull']:
                    setattr(wb, key, self.config['Weibull'][key])

            # we assume the run_dir (root) is the same every where
            run_dir = self.cases[list(self.cases.keys())[0]]['[run_dir]']
            fname = os.path.join(run_dir, 'htc', 'DLCs', 'dlc_config.xlsx')
            dlc_cfg = dlc.DLCHighLevel(fname, shape_k=wb.shape_k)
            # if you need all DLCs, make sure to have %s in the file name
            dlc_cfg.res_folder = os.path.join(run_dir, res_dir, dlc_folder)
            fh_lst = dlc_cfg.file_hour_lst(years=1.0)

        # now we have a full path to the result files, but we only need the
        # the case_id to indentify the corresponding entry from the statistics
        # DataFrame (exluciding the .sel extension)
        def basename(k):
            return os.path.basename(k[0].replace('.sel', ''))
        fh_lst_basename = [(basename(k), k[1]) for k in fh_lst]
        # only take dlc12 for power production
        case_ids = [k[0] for k in fh_lst_basename if k[0][:5]=='dlc12']
        hours = [k[1] for k in fh_lst_basename if k[0][:5]=='dlc12']

        # the default electrical power channel name from DTU Wind controller
        if ch_powe is None:
            ch_powe = 'DLL-2-inpvec-2'
        # and select only the power channels
        dfs_powe = dfs[dfs.channel==ch_powe]

        # by default we have AEP as a column
        cols = ['AEP']
        cols.extend(extra_cols)
        # Built the DataFrame, we do not have a unqique channel index
        dict_AEP = {col:[] for col in cols}
        # index on case_id on the original DataFrame so we can select accordingly
        dfs_powe = dfs_powe.set_index('[case_id]')

        # select the cases in the same order as the corresponding hours
        sel_sort = dfs_powe.loc[case_ids]
        for col in extra_cols:
            # at this stage we already should have one case, so its
            # identifiers should also be.
            val_unique = sel_sort[col].unique()
            if len(val_unique) > 1:
                print('found %i sets instead of 1:' % len(val_unique))
                print(val_unique)
                raise ValueError('For AEP, the given DataFrame can only hold'
                                 'one complete DLC set. Make sure to identify '
                                 'the proper extra_cols to identify the '
                                 'different DLC sets.')
            # values of the identifier columns for each case. We do this
            # in case the original dfs holds multiple DLC cases.
            dict_AEP[col].append(sel_sort[col].unique()[0])

        # and the AEP: take the average, multiply with the duration
#        duration = sel_sort['[duration]'].values
#        power_mean = sel_sort['mean'].values
        AEP = (sel_sort['mean'].values * np.array(hours)).sum()
        dict_AEP['AEP'].append(AEP)

        # make consistent data types, and convert to DataFrame
        fname = os.path.join(post_dir, sim_id + '_AEP')
        df_AEP = misc.dict2df(dict_AEP, fname, update=update, csv=csv,
                              save=save, check_datatypes=True, xlsx=xlsx)

        return df_AEP

    def stats2dataframe(self, ch_sel=None, tags=['[turb_seed]','[windspeed]']):
        """
        Convert the archaic statistics dictionary of a group of cases to
        a more convienent pandas dataframe format.

        DEPRICATED, use statistics instead!!

        Parameters
        ----------

        ch_sel : dict, default=None
            Map short names to the channel id's defined in ch_dict in order to
            have more human readable column names in the pandas dataframe. By
            default, if ch_sel is None, a dataframe for each channel in the
            ch_dict (so in the HAWC2 output) will be created. When ch_sel is
            defined, only those channels are considered.
            ch_sel[short name] = full ch_dict identifier

        tags : list, default=['[turb_seed]','[windspeed]']
            Select which tag values from cases should be included in the
            dataframes. This will help in selecting and identifying the
            different cases.

        Returns
        -------

        dfs : dict
            Dictionary of dataframes, where the key is the channel name of
            the output (that was optionally defined in ch_sel), and the value
            is the dataframe containing the statistical values for all the
            different selected cases.
        """

        df_dict = {}

        for cname, case in self.cases.items():

            # make sure the selected tags exist
            if len(tags) != len(set(case) and tags):
                raise KeyError('not all selected tags exist in cases')

            sig_stats = self.stats_dict[cname]['sig_stats']
            ch_dict = self.stats_dict[cname]['ch_dict']

            if ch_sel is None:
                ch_sel = { (i, i) for i in ch_dict }

            for ch_short, ch_name in ch_sel.items():

                chi = ch_dict[ch_name]['chi']
                # sig_stat = [(0=value,1=index),statistic parameter, channel]
                # stat params = 0 max, 1 min, 2 mean, 3 std, 4 range, 5 abs max
                # note that min, mean, std, and range are not relevant for index
                # values. Set to zero there.
                try:
                    df_dict[ch_short]['case name'].append(cname)
                    df_dict[ch_short]['max'].append(   sig_stats[0,0,chi])
                    df_dict[ch_short]['min'].append(   sig_stats[0,1,chi])
                    df_dict[ch_short]['mean'].append(  sig_stats[0,2,chi])
                    df_dict[ch_short]['std'].append(   sig_stats[0,3,chi])
                    df_dict[ch_short]['range'].append( sig_stats[0,4,chi])
                    df_dict[ch_short]['absmax'].append(sig_stats[0,5,chi])
                    for tag in tags:
                        df_dict[ch_short][tag].append(case[tag])
                except KeyError:
                    df_dict[ch_short] = {'case name' : [cname]}
                    df_dict[ch_short]['max']    = [sig_stats[0,0,chi]]
                    df_dict[ch_short]['min']    = [sig_stats[0,1,chi]]
                    df_dict[ch_short]['mean']   = [sig_stats[0,2,chi]]
                    df_dict[ch_short]['std']    = [sig_stats[0,3,chi]]
                    df_dict[ch_short]['range']  = [sig_stats[0,4,chi]]
                    df_dict[ch_short]['absmax'] = [sig_stats[0,5,chi]]
                    for tag in tags:
                        df_dict[ch_short][tag] = [ case[tag] ]

        # and create for each channel a dataframe
        dfs = {}
        for ch_short, df_values in df_dict.items():
            dfs[ch_short] = pd.DataFrame(df_values)

        return dfs

    def load_azimuth(self, azi, load, sectors=360):
        """
        Establish load dependency on rotor azimuth angle
        """

        # sort on azimuth angle
        isort = np.argsort(azi)
        azi = azi[isort]
        load = load[isort]

        azi_sel = np.linspace(0, 360, num=sectors)
        load_sel = np.interp(azi_sel, azi, load)

    def find_windchan_hub(self):
        """
        """
        # if we sort we'll get the largest absolute coordinate last
        for ch in sorted(self.res.ch_dict.keys()):
            if ch[:29] == 'windspeed-global-Vy-0.00-0.00':
                chan_found = ch
        return chan_found

    def ct(self, thrust, wind, A, rho=1.225):
        return thrust / (0.5 * rho * A * wind * wind)

    def cp(self, power, wind, A, rho=1.225):
        return power / (0.5 * rho * A * wind * wind * wind)

    def shaft_power(self):
        """
        Return the mechanical shaft power based on the shaft torsional loading
        """
        try:
            i = self.res.ch_dict['bearing-shaft_rot-angle_speed-rpm']['chi']
            rads = self.res.sig[:,i]*np.pi/30.0
        except KeyError:
            try:
                i = self.res.ch_dict['bearing-shaft_rot-angle_speed-rads']['chi']
                rads = self.res.sig[:,i]
            except KeyError:
                i = self.res.ch_dict['Omega']['chi']
                rads = self.res.sig[:,i]
        try:
            nn_shaft = self.config['nn_shaft']
        except:
            nn_shaft = 4
        itorque = self.res.ch_dict['shaft-shaft-node-%3.3i-momentvec-z'%nn_shaft]['chi']
        torque = self.res.sig[:,itorque]
        # negative means power is being extracted, which is exactly what a wind
        # turbine is about, we call that positive
        return -1.0*torque*rads

    def calc_torque_const(self, save=False, name='ojf'):
        """
        If we have constant RPM over the simulation, calculate the torque
        constant. The current loaded HAWC2 case is considered. Consequently,
        first load a result file with load_result_file

        Parameters
        ----------

        save : boolean, default=False

        name : str, default='ojf'
            File name of the torque constant result. Default to using the
            ojf case name. If set to hawc2, it will the case_id. In both
            cases the file name will be extended with '.kgen'

        Returns
        -------

        [windspeed, rpm, K] : list

        """
        # make sure the results have been loaded previously
        try:
            # get the relevant index to the wanted channels
            # tag: coord-bodyname-pos-sensortype-component
            tag = 'bearing-shaft_nacelle-angle_speed-rpm'
            irpm = self.res.ch_dict[tag]['chi']
            chi_rads = self.res.ch_dict['Omega']['chi']
            tag = 'shaft-shaft-node-001-momentvec-z'
            chi_q = self.res.ch_dict[tag]['chi']
        except AttributeError:
            msg = 'load results first with Cases.load_result_file()'
            raise ValueError(msg)

#        if not self.case['[fix_rpm]']:
#            print
#            return

        windspeed = self.case['[windspeed]']
        rpm = self.res.sig[:,irpm].mean()
        # and get the average rotor torque applied to maintain
        # constant rotor speed
        K = -np.mean(self.res.sig[:,chi_q]*1000./self.res.sig[:,chi_rads])

        result = np.array([windspeed, rpm, K])

        # optionally, save the values and give the case name as file name
        if save:
            fpath = self.case['[post_dir]'] + 'torque_constant/'
            if name == 'hawc2':
                fname = self.case['[case_id]'] + '.kgen'
            elif name == 'ojf':
                fname = self.case['[ojf_case]'] + '.kgen'
            else:
                raise ValueError('name should be either ojf or hawc2')
            # create the torque_constant dir if it doesn't exists
            try:
                os.mkdir(fpath)
            except OSError:
                pass

#            print('gen K saving at:', fpath+fname
            np.savetxt(fpath+fname, result, header='windspeed, rpm, K')

        return result

    def compute_envelope(self, sig, ch_list):

        envelope= {}
        for ch in ch_list:
            chi0 = self.res.ch_dict[ch[0]]['chi']
            chi1 = self.res.ch_dict[ch[1]]['chi']
            s0 = np.array(sig[:, chi0]).reshape(-1, 1)
            s1 = np.array(sig[:, chi1]).reshape(-1, 1)
            cloud =  np.append(s0, s1, axis=1)
            hull = scipy.spatial.ConvexHull(cloud)
            closed_contour = np.append(cloud[hull.vertices,:],
                                       cloud[hull.vertices[0],:].reshape(1,2),
                                       axis=0)
            for ich in range(2, len(ch)):
                chix = self.res.ch_dict[ch[ich]]['chi']
                s0 = np.array(sig[hull.vertices, chix]).reshape(-1, 1)
                s1 = np.array(sig[hull.vertices[0], chix]).reshape(-1, 1)
                s0 = np.append(s0, s1, axis=0)
                closed_contour = np.append(closed_contour, s0, axis=1)
            envelope[ch[0]] = closed_contour
        return envelope

    def envelope(self, silent=False, ch_list=[], append=''):
        """
        Calculate envelopes and save them in a table.

        Parameters
        ----------


        Returns
        -------


        """
        # get some basic parameters required to calculate statistics
        try:
            case = list(self.cases.keys())[0]
        except IndexError:
            print('no cases to select so no statistics, aborting ...')
            return None

        post_dir = self.cases[case]['[post_dir]']
        sim_id = self.cases[case]['[sim_id]']

        if not silent:
            nrcases = len(self.cases)
            print('='*79)
            print('statistics for %s, nr cases: %i' % (sim_id, nrcases))

        fname = os.path.join(post_dir, sim_id, '_envelope' + append + '.h5')
        h5f = tbl.openFile(fname, mode="w", title=str(sim_id),
                           filters=tbl.Filters(complevel=9))

        # Create a new group under "/" (root)
        for ii, (cname, case) in enumerate(self.cases.items()):

            groupname = str(cname[:-4])
            groupname = groupname.replace('-', '_')
            h5f.createGroup("/", groupname)
            ctab = getattr(h5f.root, groupname)

            if not silent:
                pc = '%6.2f' % (float(ii)*100.0/float(nrcases))
                pc += ' %'
                print('envelope progress: %4i/%i %s' % (ii, nrcases, pc))

            self.load_result_file(case)

            envelope = self.compute_envelope(self.sig, ch_list)

            for ch_id in ch_list:
                h5f.createTable(ctab, str(ch_id[0].replace('-', '_')),
                                EnvelopeClass.section,
                                title=str(ch_id[0].replace('-', '_')))
                csv_table = getattr(ctab, str(ch_id[0].replace('-', '_')))
                tablerow = csv_table.row
                for row in envelope[ch_id[0]]:
                    tablerow['Mx'] = float(row[0])
                    tablerow['My'] = float(row[1])
                    if len(row)>2:
                        tablerow['Mz'] = float(row[2])
                        if len(row)>3:
                            tablerow['Fx'] = float(row[3])
                            tablerow['Fy'] = float(row[4])
                            tablerow['Fz'] = float(row[5])
                        else:
                            tablerow['Fx'] = 0.0
                            tablerow['Fy'] = 0.0
                            tablerow['Fz'] = 0.0
                    else:
                        tablerow['Mz'] = 0.0
                        tablerow['Fx'] = 0.0
                        tablerow['Fy'] = 0.0
                        tablerow['Fz'] = 0.0
                    tablerow.append()
                csv_table.flush()
        h5f.close()


class EnvelopeClass(object):
    """
    Class with the definition of the table for the envelope results
    """
    class section(tbl.IsDescription):

        Mx = tbl.Float32Col()
        My = tbl.Float32Col()
        Mz = tbl.Float32Col()
        Fx = tbl.Float32Col()
        Fy = tbl.Float32Col()
        Fz = tbl.Float32Col()


# TODO: implement this