Skip to content
Snippets Groups Projects
Simulations.py 203 KiB
Newer Older
3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000
            elif not subset:
                # FIXME: how are we dealing with set comments now?
                # subset comments are coming before the actual subset
                # so we account them to one set later than we are now
                #if subset_nr > 0 :
                key = '%03i-%03i-a' % (set_nr, subset_nr+1)
                # in case it is not the first comment line
                if st_dict.has_key(key): st_dict[key] += line
                else: st_dict[key]  = line
                ## otherwise we have the set comments
                #else:
                    #key = '%03i-%03i-a' % (set_nr, subset_nr)
                    ## in case it is not the first comment line
                    #if st_dict.has_key(key): st_dict[key] += line
                    #else: st_dict[key]  = line

            # in case we have the data points, make sure there are enough
            # data poinst present, raise an error if it doesn't
            elif len(line_list)==19 and subset:
                # we can store it in the array
                sub_set_arr[point,:] = line_list
                # on the last entry:
                if point == nr_points-1:
                    # save to the dict:
                    st_dict['%03i-%03i-d' % (set_nr, subset_nr)]= sub_set_arr
                    # and indicate we're done subsetting, next we can have
                    # either set or subset comments
                    subset = False
                point += 1

            #else:
                #msg='error in st format: don't know where to put current line'
                #raise UserWarning, msg

        self.st_dict = st_dict
        self.st_comments = st_comments

    def _format_nr(self, number):
        """
        Automatic format the number

        prec_loss : float, default=0.01
            acceptible precision loss expressed in %

        """

        # the formatting of the number
        numabs = abs(number)
        # just a float precision defined in self.prec_float
        if (numabs < self.float_hi and numabs > self.float_lo):
            numfor = format(number, self.prec_float)
        # if it is zero, just simply print as 0.0
        elif number == 0.0:
            numfor = format(number, ' 1.1f')
        # exponentional, precision defined in self.prec_exp
        else:
            numfor = format(number, self.prec_exp)

        try:
            loss = 100.0*abs(1 - (float(numfor)/number))
        except ZeroDivisionError:
            if abs(float(numfor)) > 0.00000001:
                msg = 'precision loss, from %1.10f to %s' \
                            % (number, numfor.strip())
                raise ValueError('precesion loss for new st file')
            else:
                loss = 0
        if loss > self.prec_loss:
            msg = 'precision loss, from %1.10f to %s (%f pc)' \
                        % (number, numfor.strip(), loss)
            raise ValueError(msg)

        return numfor

    def write_st(self, file_path, file_name, print_header=False):
        """
        prec_loss : float, default=0.01
            acceptible precision loss expressed in %
        """
        # TODO: implement all the tests when writing on nset, number of data
        # points, subsetnumber sequence etc

        content = ''

        # sort the key list
        keysort = self.st_dict.keys()
        keysort.sort()

        for key in keysort:

            # in case we are just printing what was recorded before
            if not key.endswith('d'):
                content += self.st_dict[key]
            # else we have an array
            else:
                # cycle through data points and print them orderly: control
                # precision depending on the number, keep spacing constant
                # so it is easy to read the textfile
                for m in range(self.st_dict[key].shape[0]):
                    for n in range(self.st_dict[key].shape[1]):
                        # TODO: check what do we lose here?
                        # we are coming from a np.float64, as set in the array
                        # but than it will not work with the format()
                        number = float(self.st_dict[key][m,n])
                        numfor = self._format_nr(number)
                        content += numfor.rjust(self.col_width)
                    content += '\n'

                if print_header:
                    content += self.column_header_line

        # and write file to disk again
        FILE = open(file_path + file_name, 'w')
        FILE.write(content)
        FILE.close()
        if not self.silent:
            print('st file written:', file_path + file_name)

    def write_latex(self, fpath, selection=[]):
        """
        Write a table in Latex format based on the data in the st file.

        selection : list
            [ [setnr, subsetnr, table caption], [setnr, subsetnr, caption],...]
            if not specified, all subsets will be plotted

        """

        cols_p1 = ['r [m]', 'm [kg/m]', 'm(ri{_x})^2 [kgNm^2]',
                   'm(ri{_y})^2 [kgNm^2]', 'EI_x [Nm^2]', 'EI_y [Nm^2]',
                   'EA [N]', 'GJ [\\frac{Nm^2}{rad}]']

        cols_p2 = ['r [m]', 'x_cg [m]', 'y_cg [m]', 'x_sh [m]', 'y_sh [m]',
                'x_e [m]', 'y_e [m]', 'k_x [-]', 'k_y [-]', 'pitch [deg]']

        if len(selection) < 1:
            for key in self.st_dict.keys():
                # but now only take the ones that hold data
                if key[-1] == 'd':
                    selection.append([int(key[:3]), int(key[4:7])])

        for i,j, caption in selection:
            # get the data
            try:
                # set comment should be the name of the body
                set_comment = self.st_comments['%03i-000-0' % (i)]
#                subset_comment = self.st_comments['%03i-%03i-b' % (i,j)]
                st_arr = self.st_dict['%03i-%03i-d' % (i,j)]
            except AttributeError:
                msg = 'ModelData object md is not loaded properly'
                raise AttributeError(msg)

            # build the latex table header
#            textable = u"\\begin{table}[b!]\n"
#            textable += u"\\begin{center}\n"
            textable_p1 = u"\\centering\n"
            textable_p1 += u"\\begin{tabular}"
            # configure the column properties
            tmp = [u'C{2.0 cm}' for k in cols_p1]
            tmp = u"|".join(tmp)
            textable_p1 += u'{|' + tmp + u'|}'
            textable_p1 += u'\hline\n'
            # add formula mode for the headers
            tmp = []
            for k in cols_p1:
                k1, k2 = k.split(' ')
                tmp.append(u'$%s$ $%s$' % (k1,k2) )
#            tmp = [u'$%s$' % k for k in cols_p1]
            textable_p1 += u' & '.join(tmp)
            textable_p1 += u'\\\\ \n'
            textable_p1 += u'\hline\n'

            textable_p2 = u"\\centering\n"
            textable_p2 += u"\\begin{tabular}"
            # configure the column properties
            tmp = [u'C{1.5 cm}' for k in cols_p2]
            tmp = u"|".join(tmp)
            textable_p2 += u'{|' + tmp + u'|}'
            textable_p2 += u'\hline\n'
            # add formula mode for the headers
            tmp = []
            for k in cols_p2:
                k1, k2 = k.split(' ')
                tmp.append(u'$%s$ $%s$' % (k1,k2) )
#            tmp = [u'$%s$ $%s$' % (k1, k2) for k in cols_p2]
            # hack: spread the last element over two lines
#            tmp[-1] = '$pitch$ $[deg]$'
            textable_p2 += u' & '.join(tmp)
            textable_p2 += u'\\\\ \n'
            textable_p2 += u'\hline\n'

            for row in xrange(st_arr.shape[0]):
                r    = st_arr[row, self.st_headers.r]
                m    = st_arr[row,self.st_headers.m]
                x_cg = st_arr[row,self.st_headers.x_cg]
                y_cg = st_arr[row,self.st_headers.y_cg]
                ri_x = st_arr[row,self.st_headers.ri_x]
                ri_y = st_arr[row,self.st_headers.ri_y]
                x_sh = st_arr[row,self.st_headers.x_sh]
                y_sh = st_arr[row,self.st_headers.y_sh]
                E    = st_arr[row,self.st_headers.E]
                G    = st_arr[row,self.st_headers.G]
                Ixx  = st_arr[row,self.st_headers.Ixx]
                Iyy  = st_arr[row,self.st_headers.Iyy]
                I_p  = st_arr[row,self.st_headers.I_p]
                k_x  = st_arr[row,self.st_headers.k_x]
                k_y  = st_arr[row,self.st_headers.k_y]
                A    = st_arr[row,self.st_headers.A]
                pitch = st_arr[row,self.st_headers.pitch]
                x_e   = st_arr[row,self.st_headers.x_e]
                y_e   = st_arr[row,self.st_headers.y_e]
                # WARNING: same order as the labels defined in variable "cols"!
                p1 = [r, m, m*ri_x*ri_x, m*ri_y*ri_y, E*Ixx, E*Iyy, E*A,I_p*G]
                p2 = [r, x_cg, y_cg, x_sh, y_sh, x_e, y_e, k_x, k_y, pitch]

                textable_p1 += u" & ".join([self._format_nr(k) for k in p1])
                textable_p1 += u'\\\\ \n'

                textable_p2 += u" & ".join([self._format_nr(k) for k in p2])
                textable_p2 += u'\\\\ \n'

            # default caption
            if caption == '':
                caption = 'HAWC2 cross sectional parameters for body: %s' % set_comment

            textable_p1 += u"\hline\n"
            textable_p1 += u"\end{tabular}\n"
            textable_p1 += u"\caption{%s}\n" % caption
#            textable += u"\end{center}\n"
#            textable += u"\end{table}\n"

            fname = '%s-%s-%03i-%03i_p1' % (self.st_file, set_comment, i, j)
            fname = fname.replace('.', '') + '.tex'
            with open(fpath + fname, 'w') as f:
                f.write(textable_p1)

            textable_p2 += u"\hline\n"
            textable_p2 += u"\end{tabular}\n"
            textable_p2 += u"\caption{%s}\n" % caption
#            textable += u"\end{center}\n"
#            textable += u"\end{table}\n"

            fname = '%s-%s-%03i-%03i_p2' % (self.st_file, set_comment, i, j)
            fname = fname.replace('.', '') + '.tex'
            with open(fpath + fname, 'w') as f:
                f.write(textable_p2)


class WeibullParameters(object):

    def __init__(self):
        self.Vin = 4.
        self.Vr = 12.
        self.Vout = 26.
        self.Vref = 50.
        self.Vstep = 2.
        self.shape_k = 2.


# FIXME: Cases has a memory leek somewhere, this whole thing needs to be
# reconsidered and rely on a DataFrame instead of a dict!
class Cases:
    """
    Class for the old htc_dict
    ==========================

    Formerly known as htc_dict: a dictionary with on the key a case identifier
    (case name) and the value is a dictionary holding all the different tags
    and value pairs which define the case

    TODO:

    define a public API so that plugin's can be exposed in a standarized way
    using pre defined variables:

    * pandas DataFrame backend instead of a dictionary

    * generic, so not bound to HAWC2. Goal: manage a lot of simulations
      and their corresponding inputs/outus

    * integration with OpenMDAO?

    * case id (hash)

    * case name (which is typically created with variable_tag_name method)

    * results

    * inputs

    * outputs

    a variable tags that has a dictionary mirror for database alike searching

    launch, post_launch, prepare_(re)launch should be methods of this or
    inheret from Cases

    Create a method to add and remove cases from the pool so you can perform
    some analysis on them. Maybe make a GUI that present a list with current
    cases in the pool and than checkboxes to remove them.

    Remove the HAWC2 specific parts to a HAWC2 plugin. The HAWC2 plugin will
    inheret from Cases. Proposed class name: HAWC2Cases, XFOILCases

    Rename cases to pool? A pool contains several cases, mixing several
    sim_id's?

    create a unique case ID based on the hash value of all the tag+values?
    """

    # TODO: add a method that can reload a certain case_dict, you change
    # some parameters for each case (or some) and than launch again

    #def __init__(self, post_dir, sim_id, resdir=False):
    def __init__(self, *args, **kwargs):
        """
        Either load the cases dictionary if post_dir and sim_id is given,
        otherwise the input is a cases dictionary

        Paramters
        ---------

        cases : dict
            The cases dictionary in case there is only one argument

        post_dir : str
            When using two arguments

        sim_id : str or list
            When using two arguments

        resdir : str, default=False

        loadstats : boolean, default=False

        rem_failed : boolean, default=True

        """

        resdir = kwargs.get('resdir', False)
        self.loadstats = kwargs.get('loadstats', False)
        self.rem_failed = kwargs.get('rem_failed', True)
        self.config = kwargs.get('config', {})
        print(self.config)
        # determine the input argument scenario
        if len(args) == 1:
            if type(args[0]).__name__ == 'dict':
                self.cases = args[0]
                sim_id = False
            else:
                raise ValueError('One argument input should be a cases dict')
        elif len(args) == 2:
            self.post_dir = args[0]
            sim_id = args[1]
        else:
            raise ValueError('Only one or two arguments are allowed.')

        # if sim_id is a list, than merge all sim_id's of that list
        if type(sim_id).__name__ == 'list':
            # stats, dynprop and fail are empty dictionaries if they do not
            # exist
            self.merge_sim_ids(sim_id)
            # and define a new sim_id based on all items from the list
            self.sim_id = '_'.join(sim_id)
        # in case we still need to load the cases dict
        elif type(sim_id).__name__ == 'str':
            self.sim_id = sim_id
            self._get_cases_dict(self.post_dir, sim_id)
            # load the statistics if applicable
            if self.loadstats:
                self.stats_df, self.Leq_df, self.AEP_df = self.load_stats()

        # change the results directory if applicable
        if resdir:
            self.change_results_dir(resdir)

#        # try to load failed cases and remove them
#        try:
#            self.load_failed(sim_id)
#            self.remove_failed()
#        except IOError:
#            pass

        #return self.cases

    def select(self, search_keyval=False, search_key=False):
        """
        Select only a sub set of the cases

        Select either search_keyval or search_key. Using both is not supported
        yet. Run select twice to achieve the same effect. If both are False,
        cases will be emptied!

        Parameters
        ----------

        search_keyval : dictionary, default=False
            Keys are the column names. If the values match the ones in the
            database, the respective row gets selected. Each tag is hence
            a unique row identifier

        search_key : dict, default=False
            The key is the string that should either be inclusive (value TRUE)
            or exclusive (value FALSE) in the case key
        """

        db = misc.DictDB(self.cases)
        if search_keyval:
            db.search(search_keyval)
        elif search_key:
            db.search_key(search_keyval)
        else:
            db.dict_sel = {}
        # and remove all keys that are not in the list
        remove = set(self.cases) - set(db.dict_sel)
        for k in remove:
            self.cases.pop(k)

    def launch(self, runmethod='local', verbose=False, copyback_turb=True,
           silent=False, check_log=True):
        """
        Launch all cases
        """

        launch(self.cases, runmethod=runmethod, verbose=verbose, silent=silent,
               check_log=check_log, copyback_turb=copyback_turb)

    def post_launch(self, save_iter=False):
        """
        Post Launching Maintenance

        check the logs files and make sure result files are present and
        accounted for.
        """
        # TODO: integrate global post_launch in here
        self.cases_fail = post_launch(self.cases, save_iter=save_iter)

        if self.rem_failed:
            self.remove_failed()

    def load_case(self, case):
        try:
            iterations = self.load_iterations(case)
        except IOError:
            iterations = None
        res = self.load_result_file(case)
        return res, iterations

    def load_iterations(self, case):

        fp = os.path.join(case['[run_dir]'], case['[iter_dir]'],
                          case['[case_id]'])
        return np.loadtxt(fp + '.iter')

    # TODO: HAWC2 result file reading should be moved to Simulations
    # and we should also switch to faster HAWC2 reading!
    def load_result_file(self, case, _slice=False):
        """
        Set the correct HAWC2 channels

        Parameters
        ----------

        case : dict
            a case dictionary holding all the tags set for this specific
            HAWC2 simulation

        Returns
        -------

        res : object
            A HawcPy LoadResults instance with attributes such as sig, ch_dict,
            and much much more

        """

        respath = os.path.join(case['[run_dir]'], case['[res_dir]'])
        resfile = case['[case_id]']
        self.res = windIO.LoadResults(respath, resfile)
        if not _slice:
            _slice = np.r_[0:len(self.res.sig)]
        self.time = self.res.sig[_slice,0]
        self.sig = self.res.sig[_slice,:]
        self.case = case

        return self.res

    def load_struct_results(self, case, max_modes=500, nrmodes=1000):
        """
        Load the structural analysis result files
        """
        fpath = os.path.join(case['[run_dir]'], case['[eigenfreq_dir]'])

        # BEAM OUTPUT
        fname = '%s_beam_output.txt' % case['[case_id]']
        beam = None

        # BODY OUTPUT
        fname = '%s_body_output.txt' % case['[case_id]']
        body = None

        # EIGEN BODY
        fname = '%s_eigen_body.txt' % case['[case_id]']
        try:
            eigen_body, rs2 = windIO.ReadEigenBody(fpath, fname, debug=False,
                                              nrmodes=nrmodes)
        except Exception as e:
            eigen_body = None
            print('failed to load eigen_body')
            print(e)

        # EIGEN STRUCT
        fname = '%s_eigen_struct.txt' % case['[case_id]']
        try:
            eigen_struct = windIO.ReadEigenStructure(fpath, fname, debug=False,
                                                     max_modes=max_modes)
        except Exception as e:
            eigen_struct = None
            print('failed to load eigen_struct')
            print(e)

        # STRUCT INERTIA
        fname = '%s_struct_inertia.txt' % case['[case_id]']
        struct_inertia = None

        return beam, body, eigen_body, eigen_struct, struct_inertia

    def change_results_dir(self, forcedir, post_dir=False):
        """
        if the post processing concerns simulations done by thyra/gorm, and
        is downloaded locally, change path to results accordingly

        """
        for case in self.cases:
            self.cases[case]['[run_dir]'] = forcedir
            if post_dir:
                self.cases[case]['[post_dir]'] = post_dir

        #return cases

    def force_lower_case_id(self):
        tmp_cases = {}
        for cname, case in self.cases.iteritems():
            tmp_cases[cname.lower()] = case.copy()
        self.cases = tmp_cases

    def _get_cases_dict(self, post_dir, sim_id):
        """
        Load the pickled dictionary containing all the cases and their
        respective tags.

        Returns
        -------

        cases : Cases object
            cases with failures removed. Failed cases are kept in
            self.cases_fail

        """
        self.cases = load_pickled_file(os.path.join(post_dir, sim_id + '.pkl'))
        self.cases_fail = {}

        self.force_lower_case_id()

        if self.rem_failed:
            try:
                self.load_failed(sim_id)
                # ditch all the failed cases out of the htc_dict otherwise
                #  we will have fails when reading the results data files
                self.remove_failed()
            except IOError:
                print("couldn't find pickled failed dictionary")

        return

    def cases2df(self):
        """Convert the cases dict to a DataFrame and save as excel sheet"""

        tag_set = []

        # maybe some cases have tags that others don't, create a set with
        # all the tags that occur
        for cname, tags in self.cases.iteritems():
            tag_set.extend(tags.keys())
        # also add cname as a tag
        tag_set.append('cname')
        # only unique tags
        tag_set = set(tag_set)
        # and build the df_dict with all the tags
        df_dict = {tag:[] for tag in tag_set}

        for cname, tags in self.cases.iteritems():
            current_tags = set(tags.keys())
            for tag, value in tags.iteritems():
                df_dict[tag].append(value)
            # and the missing ones
            for tag in (tag_set - current_tags):
                df_dict[tag].append('')

        df_dict2 = misc.df_dict_check_datatypes(df_dict)

        return pd.DataFrame(df_dict2)

    def merge_sim_ids(self, sim_id_list, silent=False):
        """
        Load and merge for a list of sim_id's cases, fail, dynprop and stats
        ====================================================================

        For all sim_id's in the sim_id_list the cases, stats, fail and dynprop
        dictionaries are loaded. If one of them doesn't exists, an empty
        dictionary is returned.

        Currently, there is no warning given when a certain case will be
        overwritten upon merging.

        """

        cases_merged = {}
        cases_fail_merged = {}

        for ii, sim_id in enumerate(sim_id_list):

            # TODO: give a warning if we have double entries or not?
            self.sim_id = sim_id
            self._get_cases_dict(self.post_dir, sim_id)
            cases_fail_merged.update(self.cases_fail)

            # and copy to htc_dict_merged. Note that non unique keys will be
            # overwritten: each case has to have a unique name!
            cases_merged.update(self.cases)

            # merge the statistics if applicable
            # self.stats_dict[channels] = df
            if self.loadstats:
                if ii == 0:
                    self.stats_df, self.Leq_df, self.AEP_df = self.load_stats()
                else:
                    tmp1, tmp2, tmp3 = self.load_stats()
                    self.stats_df = self.stats_df.append(tmp1)
                    self.Leq_df = self.Leq_df.append(tmp2)
                    self.AEP_df = self.AEP_df.append(tmp3)

        self.cases = cases_merged
        self.cases_fail = cases_fail_merged

    def printall(self, scenario, figpath=''):
        """
        For all the cases, get the average value of a certain channel
        """
        self.figpath = figpath

        # plot for each case the dashboard
        for k in self.cases:

            if scenario == 'blade_deflection':
                self.blade_deflection(self.cases[k], self.figpath)

    def diff(self, refcase_dict, cases):
        """
        See wich tags change over the given cases of the simulation object
        """

        # there is only one case allowed in refcase dict
        if not len(refcase_dict) == 1:
            return ValueError, 'Only one case allowed in refcase dict'

        # take an arbritrary case as baseline for comparison
        refcase = refcase_dict[refcase_dict.keys()[0]]
        #reftags = sim_dict[refcase]

        diffdict = dict()
        adddict = dict()
        remdict = dict()
        print()
        print('*'*80)
        print('comparing %i cases' % len(cases))
        print('*'*80)
        print()
        # compare each case with the refcase and see if there are any diffs
        for case in sorted(cases.keys()):
            dd = misc.DictDiff(refcase, cases[case])
            diffdict[case] = dd.changed()
            adddict[case] = dd.added()
            remdict[case] = dd.removed()
            print('')
            print('='*80)
            print(case)
            print('='*80)
            for tag in sorted(diffdict[case]):
                print(tag.rjust(20),':', cases[case][tag])

        return diffdict, adddict, remdict

    def blade_deflection(self, case, **kwargs):
        """
        """

        # read the HAWC2 result file
        self.load_result_file(case)

        # select all the y deflection channels
        db = misc.DictDB(self.res.ch_dict)

        db.search({'sensortype' : 'state pos', 'component' : 'z'})
        # sort the keys and save the mean values to an array/list
        chiz, zvals = [], []
        for key in sorted(db.dict_sel.keys()):
            zvals.append(-self.sig[:,db.dict_sel[key]['chi']].mean())
            chiz.append(db.dict_sel[key]['chi'])

        db.search({'sensortype' : 'state pos', 'component' : 'y'})
        # sort the keys and save the mean values to an array/list
        chiy, yvals = [], []
        for key in sorted(db.dict_sel.keys()):
            yvals.append(self.sig[:,db.dict_sel[key]['chi']].mean())
            chiy.append(db.dict_sel[key]['chi'])

        return np.array(zvals), np.array(yvals)

    def remove_failed(self):

        # don't do anything if there is nothing defined
        if self.cases_fail == None:
            print('no failed cases to remove')
            return

        # ditch all the failed cases out of the htc_dict
        # otherwise we will have fails when reading the results data files
        for k in self.cases_fail:
            try:
                self.cases_fail[k] = copy.copy(self.cases[k])
                del self.cases[k]
                print('removed from htc_dict due to error: ' + k)
            except KeyError:
                print('WARNING: failed case does not occur in cases')
                print('   ', k)

    def load_failed(self, sim_id):

        fname = os.path.join(self.post_dir, sim_id + '_fail.pkl')
        FILE = open(fname, 'rb')
        self.cases_fail = pickle.load(FILE)
        FILE.close()

    def load_stats(self, **kwargs):
        """
        Load an existing statistcs file

        Parameters
        ----------

        post_dir : str, default=self.post_dir

        sim_id : str, default=self.sim_id

        fpath : str, default=sim_id

        leq : bool, default=False

        columns : list, default=None
        """
        post_dir = kwargs.get('post_dir', self.post_dir)
        sim_id = kwargs.get('sim_id', self.sim_id)
        fpath = os.path.join(post_dir, sim_id)
        Leq_df = kwargs.get('leq', False)
        columns = kwargs.get('columns', None)

        try:
            stats_df = pd.read_hdf(fpath + '_statistics.h5', 'table',
                                   columns=columns)
#            FILE = open(post_dir + sim_id + '_statistics.pkl', 'rb')
#            stats_dict = pickle.load(FILE)
#            FILE.close()
        except IOError:
            stats_df = None
            print('NO STATS FOUND FOR', sim_id)

        try:
            AEP_df = pd.read_hdf(fpath + '_AEP.h5', 'table')
        except IOError:
            AEP_df = None
            print('NO AEP FOUND FOR', sim_id)

        if Leq_df:
            try:
                Leq_df = pd.read_hdf(fpath + '_Leq.h5', 'table')
            except IOError:
                Leq_df = None
                print('NO Leq FOUND FOR', sim_id)

        return stats_df, Leq_df, AEP_df

    def statistics(self, new_sim_id=False, silent=False, ch_sel=None,
                   tags=['[turb_seed]','[windspeed]'], calc_mech_power=False,
                   save=True, m=[3, 4, 6, 8, 10, 12], neq=None, no_bins=46,
                   ch_fatigue={}, update=False, add_sensor=None,
                   chs_resultant=[], i0=0, i1=-1, saveinterval=1000,
                   csv=True, suffix=None, fatigue_cycles=False, A=None,
                   ch_wind=None, save_new_sigs=False):
        """
        Calculate statistics and save them in a pandas dataframe. Save also
        every 500 cases the statistics file.

        Parameters
        ----------

        ch_sel : list, default=None
            If defined, only add defined channels to the output data frame.
            The list should contain valid channel names as defined in ch_dict.

        tags : list, default=['[turb_seed]','[windspeed]']
            Select which tag values from cases should be included in the
            dataframes. This will help in selecting and identifying the
            different cases.

        ch_fatigue : list, default=[]
            Valid ch_dict channel names for which the equivalent fatigue load
            needs to be calculated. When set to None, ch_fatigue = ch_sel,
            and hence all channels will have a fatigue analysis.

        fatigue_cycles : Boolean, default=False
            If True, the cycle matrix, or sum( n_i*S_i^m ), is calculated. If
            set to False, the 1Hz equivalent load is calculated.

        chs_resultant

        add_sensor

        calc_mech_power

        saveinterval : int, default=1000
            When processing a large number of cases, the statistics file
            will be saved every saveinterval-ed case

        update : boolean, default=False
            Update an existing DataFrame instead of overwriting one. When
            the number of cases is larger then saveinterval, the statistics
            file will be updated every saveinterval-ed case

        suffix : boolean or str, default=False
            When True, the statistics data file will be appended with a suffix
            that corresponds to the index of the last case added. When a string,
            that suffix will be added to the file name (up to but excluding,
            much like range()). Set to True when a large number of cases is
            being considered in order to avoid excessively large DataFrames.

        csv : boolean, default=False
            In addition to a h5 file, save the statistics also in csv format.

        Returns
        -------

        dfs : dict
            Dictionary of dataframes, where the key is the channel name of
            the output (that was optionally defined in ch_sel), and the value
            is the dataframe containing the statistical values for all the
            different selected cases.

        """

        def add_df_row(df_dict, **kwargs):
            """
            add a new channel to the df_dict format of ch_df
            """
            for col, value in kwargs.iteritems():
                df_dict[col].append(value)
            for col in (self.res.cols - set(kwargs.keys())):
                df_dict[col].append('')
            return df_dict

        # in case the output changes, remember the original ch_sel
        if ch_sel is not None:
            ch_sel_init = ch_sel.copy()
        else:
            ch_sel_init = None

        if ch_fatigue is None:
            ch_fatigue_init = None
        else:
            ch_fatigue_init = ch_fatigue

        # TODO: should the default tags not be all the tags in the cases dict?
        tag_default = ['[case_id]', '[sim_id]']
        tag_chan = 'channel'
        # merge default with other tags
        for tag in tag_default:
            if tag not in tags:
                tags.append(tag)

        # tags can only be unique, when there the same tag appears twice
        # it will break the DataFrame creation
        if len(tags) is not len(set(tags)):
            raise ValueError('tags can only contain unique entries')

        # get some basic parameters required to calculate statistics
        try:
            case = self.cases.keys()[0]
        except IndexError:
            print('no cases to select so no statistics, aborting ...')
            return None

        post_dir = self.cases[case]['[post_dir]']
        if not new_sim_id:
            # select the sim_id from a random case
            sim_id = self.cases[case]['[sim_id]']
        else:
            sim_id = new_sim_id

        if not silent:
            nrcases = len(self.cases)
            print('='*79)
            print('statistics for %s, nr cases: %i' % (sim_id, nrcases))

        df_dict = None
        add_stats = True

        for ii, (cname, case) in enumerate(self.cases.iteritems()):

            # build the basic df_dict if not defined
            if df_dict is None:
                # the dictionary that will be used to create a pandas dataframe
                df_dict = { tag:[] for tag in tags }
                df_dict[tag_chan] = []
                # add more columns that will help with IDing the channel
                df_dict['channel_name'] = []
                df_dict['channel_units'] = []
                df_dict['channel_nr'] = []
                df_dict['channel_desc'] = []
                add_stats = True

            if not silent:
                pc = '%6.2f' % (float(ii)*100.0/float(nrcases))
                pc += ' %'
                print('stats progress: %4i/%i %s' % (ii, nrcases, pc))

            # make sure the selected tags exist
            if len(tags) != len(set(case) and tags):
                raise KeyError('    not all selected tags exist in cases')

            self.load_result_file(case)
            ch_dict_new = {}
            # this is really messy, now we are also in parallal using the
            # channel DataFrame structure
            ch_df_new = {col:[] for col in self.res.cols}
            ch_df_new['ch_name'] = []
            # calculate the statistics values
#            stats = self.res.calc_stats(self.sig, i0=i0, i1=i1)
            i_new_chans = self.sig.shape[1] # self.Nch
            sig_size = self.res.N  # len(self.sig[i0:i1,0])
            new_sigs = np.ndarray((sig_size, 0))

            if add_sensor is not None:
                chi1 = self.res.ch_dict[add_sensor['ch1_name']]['chi']
                chi2 = self.res.ch_dict[add_sensor['ch2_name']]['chi']
                name = add_sensor['ch_name_add']
                factor = add_sensor['factor']
                operator = add_sensor['operator']

                p1 = self.sig[:,chi1]
                p2 = self.sig[:,chi2]
                sig_add = np.ndarray((len(p1), 1))
                if operator == '*':
                    sig_add[:,0] = p1*p2*factor
                elif operator == '/':
                    sig_add[:,0] = factor*p1/p2
                else:
                    raise ValueError('Operator needs to be either * or /')
#                add_stats = self.res.calc_stats(sig_add)
#                add_stats_i = stats['max'].shape[0]
                # add a new channel description for the mechanical power
                ch_dict_new[name] = {}
                ch_dict_new[name]['chi'] = i_new_chans
                ch_df_new = add_df_row(ch_df_new, {'chi':i_new_chans,
                                                   'ch_name':name})
                i_new_chans += 1
                new_sigs = np.append(new_sigs, sig_add, axis=1)
#                # and append to all the statistics types
#                for key, stats_arr in stats.iteritems():
#                    stats[key] = np.append(stats_arr, add_stats[key])

            # calculate the resultants
            sig_resultants = np.ndarray((sig_size, len(chs_resultant)))
            inc = []
            for j, chs in enumerate(chs_resultant):
                sig_res = np.ndarray((sig_size, len(chs)))
                lab = ''
                no_channel = False
                for i, ch in enumerate(chs):
                    # if the channel does not exist, zet to zero
                    try:
                        chi = self.res.ch_dict[ch]['chi']
                        sig_res[:,i] = self.sig[:,chi]
                        no_channel = False
                    except KeyError:
                        no_channel = True
                    lab += ch.split('-')[-1]
                name = '-'.join(ch.split('-')[:-1] + [lab])
                # when on of the components do no exist, we can not calculate
                # the resultant!
                if no_channel: