Newer
Older
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
def remove_failed(self):
# don't do anything if there is nothing defined
if self.cases_fail == None:
print('no failed cases to remove')
return
# ditch all the failed cases out of the htc_dict
# otherwise we will have fails when reading the results data files
for k in self.cases_fail:
try:
self.cases_fail[k] = copy.copy(self.cases[k])
del self.cases[k]
print('removed from htc_dict due to error: ' + k)
except KeyError:
print('WARNING: failed case does not occur in cases')
print(' ', k)
def load_failed(self, sim_id):
fname = os.path.join(self.post_dir, sim_id + '_fail.pkl')
FILE = open(fname, 'rb')
self.cases_fail = pickle.load(FILE)
FILE.close()
def load_stats(self, **kwargs):
"""
Load an existing statistcs file
Parameters
----------
post_dir : str, default=self.post_dir
sim_id : str, default=self.sim_id
fpath : str, default=sim_id
leq : bool, default=False
columns : list, default=None
"""
post_dir = kwargs.get('post_dir', self.post_dir)
sim_id = kwargs.get('sim_id', self.sim_id)
fpath = os.path.join(post_dir, sim_id)
Leq_df = kwargs.get('leq', False)
columns = kwargs.get('columns', None)
try:
stats_df = pd.read_hdf(fpath + '_statistics.h5', 'table',
columns=columns)
# FILE = open(post_dir + sim_id + '_statistics.pkl', 'rb')
# stats_dict = pickle.load(FILE)
# FILE.close()
except IOError:
stats_df = None
print('NO STATS FOUND FOR', sim_id)
try:
AEP_df = pd.read_hdf(fpath + '_AEP.h5', 'table')
except IOError:
AEP_df = None
print('NO AEP FOUND FOR', sim_id)
if Leq_df:
try:
Leq_df = pd.read_hdf(fpath + '_Leq.h5', 'table')
except IOError:
Leq_df = None
print('NO Leq FOUND FOR', sim_id)
return stats_df, Leq_df, AEP_df
def statistics(self, new_sim_id=False, silent=False, ch_sel=None,
tags=['[turb_seed]','[windspeed]'], calc_mech_power=False,
save=True, m=[3, 4, 6, 8, 10, 12], neq=None, no_bins=46,
ch_fatigue={}, update=False, add_sensor=None,
chs_resultant=[], i0=0, i1=-1, saveinterval=1000,
csv=True, suffix=None, A=None,
ch_wind=None, save_new_sigs=False, xlsx=False):
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
"""
Calculate statistics and save them in a pandas dataframe. Save also
every 500 cases the statistics file.
Parameters
----------
ch_sel : list, default=None
If defined, only add defined channels to the output data frame.
The list should contain valid channel names as defined in ch_dict.
tags : list, default=['[turb_seed]','[windspeed]']
Select which tag values from cases should be included in the
dataframes. This will help in selecting and identifying the
different cases.
ch_fatigue : list, default=[]
Valid ch_dict channel names for which the equivalent fatigue load
needs to be calculated. When set to None, ch_fatigue = ch_sel,
and hence all channels will have a fatigue analysis.
chs_resultant
add_sensor
calc_mech_power
saveinterval : int, default=1000
When processing a large number of cases, the statistics file
will be saved every saveinterval-ed case
update : boolean, default=False
Update an existing DataFrame instead of overwriting one. When
the number of cases is larger then saveinterval, the statistics
file will be updated every saveinterval-ed case
suffix : boolean or str, default=False
When True, the statistics data file will be appended with a suffix
that corresponds to the index of the last case added. When a string,
that suffix will be added to the file name (up to but excluding,
much like range()). Set to True when a large number of cases is
being considered in order to avoid excessively large DataFrames.
csv : boolean, default=False
In addition to a h5 file, save the statistics also in csv format.
xlsx : boolean, default=False
In addition to a h5 file, save the statistics also in MS Excel xlsx
format.
Returns
-------
dfs : dict
Dictionary of dataframes, where the key is the channel name of
the output (that was optionally defined in ch_sel), and the value
is the dataframe containing the statistical values for all the
different selected cases.
"""
def add_df_row(df_dict, **kwargs):
"""
add a new channel to the df_dict format of ch_df
"""
for col, value in kwargs.items():
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
df_dict[col].append(value)
for col in (self.res.cols - set(kwargs.keys())):
df_dict[col].append('')
return df_dict
# in case the output changes, remember the original ch_sel
if ch_sel is not None:
ch_sel_init = ch_sel.copy()
else:
ch_sel_init = None
if ch_fatigue is None:
ch_fatigue_init = None
else:
ch_fatigue_init = ch_fatigue
# TODO: should the default tags not be all the tags in the cases dict?
tag_default = ['[case_id]', '[sim_id]']
tag_chan = 'channel'
# merge default with other tags
for tag in tag_default:
if tag not in tags:
tags.append(tag)
# tags can only be unique, when there the same tag appears twice
# it will break the DataFrame creation
if len(tags) is not len(set(tags)):
raise ValueError('tags can only contain unique entries')
# get some basic parameters required to calculate statistics
try:
case = list(self.cases.keys())[0]
except IndexError:
print('no cases to select so no statistics, aborting ...')
return None
post_dir = self.cases[case]['[post_dir]']
if not new_sim_id:
# select the sim_id from a random case
sim_id = self.cases[case]['[sim_id]']
else:
sim_id = new_sim_id
if not silent:
nrcases = len(self.cases)
print('='*79)
print('statistics for %s, nr cases: %i' % (sim_id, nrcases))
df_dict = None
add_stats = True
for ii, (cname, case) in enumerate(self.cases.items()):
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
# build the basic df_dict if not defined
if df_dict is None:
# the dictionary that will be used to create a pandas dataframe
df_dict = { tag:[] for tag in tags }
df_dict[tag_chan] = []
# add more columns that will help with IDing the channel
df_dict['channel_name'] = []
df_dict['channel_units'] = []
df_dict['channel_nr'] = []
df_dict['channel_desc'] = []
add_stats = True
if not silent:
pc = '%6.2f' % (float(ii)*100.0/float(nrcases))
pc += ' %'
print('stats progress: %4i/%i %s' % (ii, nrcases, pc))
# make sure the selected tags exist
if len(tags) != len(set(case) and tags):
raise KeyError(' not all selected tags exist in cases')
self.load_result_file(case)
ch_dict_new = {}
# this is really messy, now we are also in parallal using the
# channel DataFrame structure
ch_df_new = {col:[] for col in self.res.cols}
ch_df_new['ch_name'] = []
# calculate the statistics values
# stats = self.res.calc_stats(self.sig, i0=i0, i1=i1)
i_new_chans = self.sig.shape[1] # self.Nch
sig_size = self.res.N # len(self.sig[i0:i1,0])
new_sigs = np.ndarray((sig_size, 0))
if add_sensor is not None:
chi1 = self.res.ch_dict[add_sensor['ch1_name']]['chi']
chi2 = self.res.ch_dict[add_sensor['ch2_name']]['chi']
name = add_sensor['ch_name_add']
factor = add_sensor['factor']
operator = add_sensor['operator']
p1 = self.sig[:,chi1]
p2 = self.sig[:,chi2]
sig_add = np.ndarray((len(p1), 1))
if operator == '*':
sig_add[:,0] = p1*p2*factor
elif operator == '/':
sig_add[:,0] = factor*p1/p2
else:
raise ValueError('Operator needs to be either * or /')
# add_stats = self.res.calc_stats(sig_add)
# add_stats_i = stats['max'].shape[0]
# add a new channel description for the mechanical power
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, sig_add, axis=1)
# # and append to all the statistics types
# for key, stats_arr in stats.iteritems():
# stats[key] = np.append(stats_arr, add_stats[key])
# calculate the resultants
sig_resultants = np.ndarray((sig_size, len(chs_resultant)))
inc = []
for j, chs in enumerate(chs_resultant):
sig_res = np.ndarray((sig_size, len(chs)))
lab = ''
no_channel = False
for i, ch in enumerate(chs):
# if the channel does not exist, zet to zero
try:
chi = self.res.ch_dict[ch]['chi']
sig_res[:,i] = self.sig[:,chi]
no_channel = False
except KeyError:
no_channel = True
lab += ch.split('-')[-1]
name = '-'.join(ch.split('-')[:-1] + [lab])
# when on of the components do no exist, we can not calculate
# the resultant!
if no_channel:
rpl = (name, cname)
print(' missing channel, no resultant for: %s, %s' % rpl)
continue
inc.append(j)
sig_resultants[:,j] = np.sqrt(sig_res*sig_res).sum(axis=1)
# resultant = np.sqrt(sig_resultants[:,j].reshape(self.res.N, 1))
# add_stats = self.res.calc_stats(resultant)
# add_stats_i = stats['max'].shape[0]
# add a new channel description for this resultant
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
# and append to all the statistics types
# for key, stats_arr in stats.iteritems():
# stats[key] = np.append(stats_arr, add_stats[key])
if len(chs_resultant) > 0:
# but only take the channels that where not missing
new_sigs = np.append(new_sigs, sig_resultants[:,inc], axis=1)
# calculate mechanical power first before deriving statistics
# from it
if calc_mech_power:
name = 'stats-shaft-power'
sig_pmech = np.ndarray((sig_size, 1))
sig_pmech[:,0] = self.shaft_power()
# P_mech_stats = self.res.calc_stats(sig_pmech)
# mech_stats_i = stats['max'].shape[0]
# add a new channel description for the mechanical power
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, sig_pmech, axis=1)
# and C_p_mech
if A is not None:
name = 'stats-cp-mech'
if ch_wind is None:
chiwind = self.res.ch_dict[self.find_windchan_hub()]['chi']
else:
chiwind = self.res.ch_dict[ch_wind]['chi']
wind = self.res.sig[:,chiwind]
cp = np.ndarray((sig_size, 1))
cp[:,0] = self.cp(-sig_pmech[:,0], wind, A)
# add a new channel description for the mechanical power
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, cp, axis=1)
try:
try:
nn_shaft = self.config['nn_shaft']
except:
nn_shaft = 4
chan_t = 'shaft_nonrotate-shaft-node-%3.3i-forcevec-z'%nn_shaft
i = self.res.ch_dict[chan_t]['chi']
thrust = self.res.sig[:,i]
name = 'stats-ct'
ct = np.ndarray((sig_size, 1))
ct[:,0] = self.ct(thrust, wind, A)
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, ct, axis=1)
except KeyError:
print(' can not calculate CT')
# and append to all the statistics types
# for key, stats_arr in stats.iteritems():
# stats[key] = np.append(stats_arr, P_mech_stats[key])
if save_new_sigs and new_sigs.shape[1] > 0:
chis, keys = [], []
for key, value in ch_dict_new.items():
chis.append(value['chi'])
keys.append(key)
# sort on channel number, so it agrees with the new_sigs array
isort = np.array(chis).argsort()
keys = np.array(keys)[isort].tolist()
df_new_sigs = pd.DataFrame(new_sigs, columns=keys)
respath = os.path.join(case['[run_dir]'], case['[res_dir]'])
resfile = case['[case_id]']
fname = os.path.join(respath, resfile + '_postres.h5')
print(' saving post-processed res: %s...' % fname, end='')
df_new_sigs.to_hdf(fname, 'table', mode='w', format='table',

David Verelst
committed
complevel=9, complib=self.complib)
print('done!')
del df_new_sigs
ch_dict = self.res.ch_dict.copy()
ch_dict.update(ch_dict_new)
# ch_df = pd.concat([self.res.ch_df, pd.DataFrame(ch_df_new)])
# put all the extra channels into the results if we want to also
# be able to calculate the fatigue loads on them.
self.sig = np.append(self.sig, new_sigs, axis=1)
# calculate the statistics values
stats = self.res.calc_stats(self.sig, i0=i0, i1=i1)
# Because each channel is a new row, it doesn't matter how many
# data channels each case has, and this approach does not brake
# when different cases have a different number of output channels
# By default, just take all channels in the result file.
if ch_sel_init is None:
ch_sel = list(ch_dict.keys())
# ch_sel = ch_df.ch_name.tolist()
# ch_sel = [str(k) for k in ch_sel]
print(' selecting all channels for statistics')
# calculate the fatigue properties from selected channels
fatigue, tags_fatigue = {}, []
if ch_fatigue_init is None:
ch_fatigue = ch_sel
print(' selecting all channels for fatigue')
else:
ch_fatigue = ch_fatigue_init
for ch_id in ch_fatigue:
chi = ch_dict[ch_id]['chi']
signal = self.sig[:,chi]
if neq is None:

David Verelst
committed
neq_ = float(case['[duration]'])
else:
neq_ = neq
eq = self.res.calc_fatigue(signal, no_bins=no_bins, neq=neq_,
m=m)
# save in the fatigue results

David Verelst
committed
fatigue[ch_id]['neq'] = neq_
# when calc_fatigue succeeds, we should have as many items
# as in m
if len(eq) == len(m):
for eq_, m_ in zip(eq, m):
fatigue[ch_id]['m=%2.01f' % m_] = eq_
# when it fails, we get an empty list back
else:
for m_ in m:
fatigue[ch_id]['m=%2.01f' % m_] = np.nan
# build the fatigue tags
for m_ in m:
tag = 'm=%2.01f' % m_
tags_fatigue.append(tag)
tags_fatigue.append('neq')
# -----------------------------------------------------------------
# define the pandas data frame dict on first run
# -----------------------------------------------------------------
# Only build the ch_sel collection once. By definition, the
# statistics, fatigue and htc tags will not change
if add_stats:
# statistical parameters
for statparam in list(stats.keys()):
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
df_dict[statparam] = []
# # additional tags
# for tag in tags:
# df_dict[tag] = []
# fatigue data
for tag in tags_fatigue:
df_dict[tag] = []
add_stats = False
for ch_id in ch_sel:
chi = ch_dict[ch_id]['chi']
# ch_name is not unique anymore, this doesn't work obviously!
# use the channel index instead, that is unique
# chi = ch_df[ch_df.ch_name==ch_id].chi.values[0]
# sig_stat = [(0=value,1=index),statistic parameter, channel]
# stat params = 0 max, 1 min, 2 mean, 3 std, 4 range, 5 abs max
# note that min, mean, std, and range are not relevant for index
# values. Set to zero there.
# -------------------------------------------------------------
# Fill in all the values for the current data entry
# -------------------------------------------------------------
# the auxiliry columns
try:
name = self.res.ch_details[chi,0]
unit = self.res.ch_details[chi,1]
desc = self.res.ch_details[chi,2]
# the new channels from new_sigs are not in here
except (IndexError, AttributeError) as e:
name = ch_id
desc = ''
unit = ''
df_dict['channel_name'].append(name)
df_dict['channel_units'].append(unit)
df_dict['channel_desc'].append(desc)
df_dict['channel_nr'].append(chi)
# each df line is a channel of case that needs to be id-eed
df_dict[tag_chan].append(ch_id)
# for all the statistics keys, save the values for the
# current channel
for statparam in list(stats.keys()):
df_dict[statparam].append(stats[statparam][chi])
# and save the tags from the input htc file in order to
# label each different case properly
for tag in tags:
df_dict[tag].append(case[tag])
# append any fatigue channels if applicable, otherwise nan
if ch_id in fatigue:
for m_fatigue, eq_ in fatigue[ch_id].items():
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
df_dict[m_fatigue].append(eq_)
else:
for tag in tags_fatigue:
# TODO: or should this be NaN?
df_dict[tag].append(np.nan)
# when dealing with a lot of cases, save the stats data at
# intermediate points to avoid memory issues
if math.fmod(ii+1, saveinterval) == 0.0:
df_dict2 = self._df_dict_check_datatypes(df_dict)
# convert, save/update
if isinstance(suffix, str):
ext = suffix
elif suffix is True:
ext = '_%06i' % (ii+1)
else:
ext = ''
# dfs = self._df_dict_save(df_dict2, post_dir, sim_id, save=save,
# update=update, csv=csv, suffix=ext)
# TODO: test this first
fname = os.path.join(post_dir, sim_id + '_statistics' + ext)
dfs = misc.dict2df(df_dict2, fname, save=save, update=update,
csv=csv, xlsx=xlsx, check_datatypes=False,
complib=self.complib)
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
df_dict2 = None
df_dict = None
add_stats = True
# only save again when there is actual data in df_dict
if df_dict is not None:
# make consistent data types
df_dict2 = self._df_dict_check_datatypes(df_dict)
# convert, save/update
if isinstance(suffix, str):
ext = suffix
elif suffix is True:
ext = '_%06i' % ii
else:
ext = ''
# dfs = self._df_dict_save(df_dict2, post_dir, sim_id, save=save,
# update=update, csv=csv, suffix=ext)
# TODO: test this first
fname = os.path.join(post_dir, sim_id + '_statistics' + ext)
dfs = misc.dict2df(df_dict2, fname, save=save, update=update,
csv=csv, xlsx=xlsx, check_datatypes=False,
complib=self.complib)
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
return dfs
def _add2newsigs(self, ch_dict, name, i_new_chans, new_sigs, addendum):
ch_dict[name] = {}
ch_dict[name]['chi'] = i_new_chans
i_new_chans += 1
return ch_dict, np.append(new_sigs, addendum, axis=1)
# TODO: use the version in misc instead.
def _df_dict_save(self, df_dict2, post_dir, sim_id, save=True,
update=False, csv=True, suffix=None):
"""
Convert the df_dict to df and save/update.
DEPRICATED, use misc.dict2df instead
"""
if isinstance(suffix, str):
fpath = os.path.join(post_dir, sim_id + '_statistics' + suffix)
else:
fpath = os.path.join(post_dir, sim_id + '_statistics')
# in case converting to dataframe fails, fall back
try:
dfs = pd.DataFrame(df_dict2)
except Exception as e:
FILE = open(fpath + '.pkl', 'wb')
pickle.dump(df_dict2, FILE, protocol=2)
FILE.close()
# check what went wrong
misc.check_df_dict(df_dict2)
print('failed to convert to data frame, saved as dict')
raise(e)
# # apply categoricals to objects
# for column_name, column_dtype in dfs.dtypes.iteritems():
# # applying categoricals mostly makes sense for objects
# # we ignore all others
# if column_dtype.name == 'object':
# dfs[column_name] = dfs[column_name].astype('category')
# and save/update the statistics database
if save:
if update:
print('updating statistics: %s ...' % (post_dir + sim_id), end='')
try:
dfs.to_hdf('%s.h5' % fpath, 'table', mode='r+', append=True,

David Verelst
committed
format='table', complevel=9, complib=self.complib)
except IOError:
print('Can not update, file does not exist. Saving instead'
'...', end='')
dfs.to_hdf('%s.h5' % fpath, 'table', mode='w',

David Verelst
committed
format='table', complevel=9, complib=self.complib)
else:
print('saving statistics: %s ...' % (post_dir + sim_id), end='')
if csv:
dfs.to_csv('%s.csv' % fpath)
dfs.to_hdf('%s.h5' % fpath, 'table', mode='w',

David Verelst
committed
format='table', complevel=9, complib=self.complib)
print('DONE!!\n')
return dfs
# TODO: use the version in misc instead.
def _df_dict_check_datatypes(self, df_dict):
"""
there might be a mix of strings and numbers now, see if we can have
the same data type throughout a column
nasty hack: because of the unicode -> string conversion we might not
overwrite the same key in the dict.
DEPRICATED, use misc.df_dict_check_datatypes instead
"""
# FIXME: this approach will result in twice the memory useage though...
# we can not pop/delete items from a dict while iterating over it
df_dict2 = {}
for colkey, col in df_dict.items():
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
# if we have a list, convert to string
if type(col[0]).__name__ == 'list':
for ii, item in enumerate(col):
col[ii] = '**'.join(item)
# if we already have an array (statistics) or a list of numbers
# do not try to cast into another data type, because downcasting
# in that case will not raise any exception
elif type(col[0]).__name__[:3] in ['flo', 'int', 'nda']:
df_dict2[str(colkey)] = np.array(col)
continue
# in case we have unicodes instead of strings, we need to convert
# to strings otherwise the saved .h5 file will have pickled elements
try:
df_dict2[str(colkey)] = np.array(col, dtype=np.int32)
except OverflowError:
try:
df_dict2[str(colkey)] = np.array(col, dtype=np.int64)
except OverflowError:
df_dict2[str(colkey)] = np.array(col, dtype=np.float64)
except ValueError:
try:
df_dict2[str(colkey)] = np.array(col, dtype=np.float64)
except ValueError:
df_dict2[str(colkey)] = np.array(col, dtype=np.str)
except TypeError:
# in all other cases, make sure we have converted them to
# strings and NOT unicode
df_dict2[str(colkey)] = np.array(col, dtype=np.str)
except Exception as e:
print('failed to convert column %s to single data type' % colkey)
raise(e)
return df_dict2
def fatigue_lifetime(self, dfs, neq_life, res_dir='res/', fh_lst=None,
dlc_folder="dlc%s_iec61400-1ed3/", extra_cols=[],
save=False, update=False, csv=False, new_sim_id=False,
xlsx=False, years=20.0, silent=False):
"""
Cacluate the fatigue over a selection of cases and indicate how many
hours each case contributes to its life time.
This approach can only work reliably if the common DLC folder
structure is followed. This also means that a 'dlc_config.xlsx' Excel
file is required in the HAWC2 root directory (as defined in the
[run_dir] tag).
Parameters
----------
dfs : DataFrame
Statistics Pandas DataFrame. When extra_cols is not defined, it
should only hold the results of one standard organized DLC (one
turbine, one inflow case).
Reference number of cycles. Usually, neq is either set to 10e6,
10e7 or 10e8.
res_dir : str, default='res/'
Base directory of the results. Results would be located in

David Verelst
committed
res/dlc_folder/*.sel. Only relevant when fh_lst is None.
dlc_folder : str, default="dlc%s_iec61400-1ed3/"
String with the DLC subfolder names. One string substitution is
required (%s), and should represent the DLC number (withouth comma
or point). Not relevant when fh_lst is defined.
extra_cols : list, default=[]
The included columns are the material constants, and each row is
a channel. When multiple DLC cases are included in dfs, the user
has to define additional columns in order to distinguish between
the DLC cases.
fh_lst : list, default=None
Number of hours for each case over its life time. Format:
[(filename, hours),...] where, filename is the name of the file
(can be a full path, but only the base path is considered), hours
is the number of hours over the life time. When fh_lst is set,

David Verelst
committed
res_dir, dlc_folder and dlc_name are not used.
years : float, default=20
Total life time expressed in years.
Returns
-------
df_Leq : DataFrame
Pandas DataFrame with the life time equivalent load for the given
neq, all the channels, and a range of material parameters m.
"""
if not silent:
print('Calculating life time fatigue load')
# get some basic parameters required to calculate statistics
try:
case = list(self.cases.keys())[0]
if not silent:
print('no cases to select so no statistics, aborting ...')
return None
post_dir = self.cases[case]['[post_dir]']
if not new_sim_id:
# select the sim_id from a random case
sim_id = self.cases[case]['[sim_id]']
else:
sim_id = new_sim_id
if fh_lst is None:
wb = WeibullParameters()
if 'Weibull' in self.config:
for key in self.config['Weibull']:
setattr(wb, key, self.config['Weibull'][key])
# we assume the run_dir (root) is the same every where
run_dir = self.cases[case]['[run_dir]']
fname = os.path.join(run_dir, 'dlc_config.xlsx')
dlc_cfg = dlc.DLCHighLevel(fname, shape_k=wb.shape_k)
# if you need all DLCs, make sure to have %s in the file name
dlc_cfg.res_folder = os.path.join(run_dir, res_dir, dlc_folder)
fh_lst = dlc_cfg.file_hour_lst(years=years)
# now we have a full path to the result files, but we only need the
# the case_id to indentify the corresponding entry from the statistics
# DataFrame (exluciding the .sel extension)
case_ids = [os.path.basename(k[0].replace('.sel', '')) for k in fh_lst]
hours = [k[1] for k in fh_lst]
# ---------------------------------------------------------------------
# column definitions
# ---------------------------------------------------------------------
# available material constants
ms, cols = [], []
if key[:2] == 'm=':
ms.append(key)
# when multiple DLC cases are included, add extra cols to identify each
# DLC group. Make a copy, because extra_cols does not get re-initiated
# when defined as an optional keyword argument
extra_cols_ = copy.copy(extra_cols + ['channel'])
cols.extend(extra_cols_)
# ---------------------------------------------------------------------
# Built the DataFrame, we do not have a unqique channel index
dict_Leq = {col:[] for col in cols}
# index on case_id on the original DataFrame so we can select accordingly
dfs = dfs.set_index('[case_id]')
# which rows to keep: a
# select for each channel all the cases
for grname, gr in dfs.groupby(dfs.channel):
# if one m has any nan's, assume none of them are good and throw
# away
# if np.isnan(gr[ms[0]].values).any():
# sel_rows.pop(grname)
# continue
# select the cases in the same order as the corresponding hours
try:
sel_sort = gr.loc[case_ids]
except KeyError:
if not silent:
print(' ignore sensor for Leq:', grname)
for col in extra_cols_:
# at this stage we already should have one case, so its
# identifiers should also be.
val_unique = sel_sort[col].unique()
if len(val_unique) > 1:
print('found %i sets instead of 1:' % len(val_unique))
print(val_unique)
raise ValueError('For Leq load, the given DataFrame can '
'only hold one complete DLC set.')
# values of the identifier columns for each case. We do this
# in case the original dfs holds multiple DLC cases.
dict_Leq[col].append(sel_sort[col].unique()[0])
# R_eq is usually expressed as the 1Hz equivalent load
neq_1hz = sel_sort['neq'].values
# sel_sort[m] holds the equivalent loads for each of the DLC
# cases: such all the different wind speeds for dlc1.2

David Verelst
committed
m_ = float(m.split('=')[1])
R_eq_mod = np.power(sel_sort[m].values, m_) * neq_1hz
tmp = (R_eq_mod*np.array(hours)).sum()
# the effective Leq for each of the material constants

David Verelst
committed
dict_Leq[m].append(math.pow(tmp/neq_life, 1.0/m_))
# the following is twice as slow:
# [i*j for (i,j) in zip(sel_sort[m].values.tolist(),hours)]
# collens = misc.check_df_dict(dict_Leq)
# make consistent data types, and convert to DataFrame
fname = os.path.join(post_dir, sim_id + '_Leq')
df_Leq = misc.dict2df(dict_Leq, fname, save=save, update=update,
csv=csv, check_datatypes=True, xlsx=xlsx,
complib=self.complib)
# only keep the ones that do not have nan's (only works with index)
return df_Leq
def AEP(self, dfs, fh_lst=None, ch_powe=None, extra_cols=[], update=False,
res_dir='res/', dlc_folder="dlc%s_iec61400-1ed3/", csv=False,
new_sim_id=False, save=False, years=20.0, xlsx=False):
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
"""
Calculate the Annual Energy Production (AEP) for DLC1.2 cases.
Parameters
----------
dfs : DataFrame
Statistics Pandas DataFrame. When extra_cols is not defined, it
should only hold the results of one standard organized DLC (one
turbine, one inflow case).
fh_lst : list, default=None
Number of hours for each case over its life time. Format:
[(filename, hours),...] where, filename is the name of the file
(can be a full path, but only the base path is considered), hours
is the number of hours over the life time. When fh_lst is set,
dlc_folder and dlc_name are not used.
ch_powe : string, default=None
extra_cols : list, default=[]
The included column is just the AEP, and each row is
a channel. When multiple DLC cases are included in dfs, the user
has to define additional columns in order to distinguish between
the DLC cases.
res_dir : str, default='res/'
Base directory of the results. Results would be located in
res/dlc_folder/*.sel
dlc_folder : str, default="dlc%s_iec61400-1ed3/"
String with the DLC subfolder names. One string substitution is
required (%s), and should represent the DLC number (withouth comma
or point). Not relevant when fh_lst is defined.
"""
# get some basic parameters required to calculate statistics
try:
case = list(self.cases.keys())[0]
except IndexError:
print('no cases to select so no statistics, aborting ...')
return None
post_dir = self.cases[case]['[post_dir]']
if not new_sim_id:
# select the sim_id from a random case
sim_id = self.cases[case]['[sim_id]']
else:
sim_id = new_sim_id
if fh_lst is None:
wb = WeibullParameters()
if 'Weibull' in self.config:
for key in self.config['Weibull']:
setattr(wb, key, self.config['Weibull'][key])
# we assume the run_dir (root) is the same every where
run_dir = self.cases[list(self.cases.keys())[0]]['[run_dir]']
fname = os.path.join(run_dir, 'dlc_config.xlsx')
dlc_cfg = dlc.DLCHighLevel(fname, shape_k=wb.shape_k)
# if you need all DLCs, make sure to have %s in the file name
dlc_cfg.res_folder = os.path.join(run_dir, res_dir, dlc_folder)
fh_lst = dlc_cfg.file_hour_lst(years=1.0)
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
# now we have a full path to the result files, but we only need the
# the case_id to indentify the corresponding entry from the statistics
# DataFrame (exluciding the .sel extension)
def basename(k):
return os.path.basename(k[0].replace('.sel', ''))
fh_lst_basename = [(basename(k), k[1]) for k in fh_lst]
# only take dlc12 for power production
case_ids = [k[0] for k in fh_lst_basename if k[0][:5]=='dlc12']
hours = [k[1] for k in fh_lst_basename if k[0][:5]=='dlc12']
# the default electrical power channel name from DTU Wind controller
if ch_powe is None:
ch_powe = 'DLL-2-inpvec-2'
# and select only the power channels
dfs_powe = dfs[dfs.channel==ch_powe]
# by default we have AEP as a column
cols = ['AEP']
cols.extend(extra_cols)
# Built the DataFrame, we do not have a unqique channel index
dict_AEP = {col:[] for col in cols}
# index on case_id on the original DataFrame so we can select accordingly
dfs_powe = dfs_powe.set_index('[case_id]')
# select the cases in the same order as the corresponding hours
sel_sort = dfs_powe.loc[case_ids]
for col in extra_cols:
# at this stage we already should have one case, so its
# identifiers should also be.
val_unique = sel_sort[col].unique()
if len(val_unique) > 1:
print('found %i sets instead of 1:' % len(val_unique))
print(val_unique)
raise ValueError('For AEP, the given DataFrame can only hold'
'one complete DLC set. Make sure to identify '
'the proper extra_cols to identify the '
'different DLC sets.')
# values of the identifier columns for each case. We do this
# in case the original dfs holds multiple DLC cases.
dict_AEP[col].append(sel_sort[col].unique()[0])
# and the AEP: take the average, multiply with the duration
# duration = sel_sort['[duration]'].values
# power_mean = sel_sort['mean'].values
AEP = (sel_sort['mean'].values * np.array(hours)).sum()
dict_AEP['AEP'].append(AEP)
# make consistent data types, and convert to DataFrame
fname = os.path.join(post_dir, sim_id + '_AEP')
df_AEP = misc.dict2df(dict_AEP, fname, update=update, csv=csv,
save=save, check_datatypes=True, xlsx=xlsx,
complib=self.complib)
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
return df_AEP
def stats2dataframe(self, ch_sel=None, tags=['[turb_seed]','[windspeed]']):
"""
Convert the archaic statistics dictionary of a group of cases to
a more convienent pandas dataframe format.
DEPRICATED, use statistics instead!!
Parameters
----------
ch_sel : dict, default=None
Map short names to the channel id's defined in ch_dict in order to
have more human readable column names in the pandas dataframe. By
default, if ch_sel is None, a dataframe for each channel in the
ch_dict (so in the HAWC2 output) will be created. When ch_sel is
defined, only those channels are considered.
ch_sel[short name] = full ch_dict identifier
tags : list, default=['[turb_seed]','[windspeed]']
Select which tag values from cases should be included in the
dataframes. This will help in selecting and identifying the
different cases.
Returns
-------
dfs : dict
Dictionary of dataframes, where the key is the channel name of
the output (that was optionally defined in ch_sel), and the value
is the dataframe containing the statistical values for all the
different selected cases.
"""
df_dict = {}
for cname, case in self.cases.items():
# make sure the selected tags exist
if len(tags) != len(set(case) and tags):
raise KeyError('not all selected tags exist in cases')
sig_stats = self.stats_dict[cname]['sig_stats']
ch_dict = self.stats_dict[cname]['ch_dict']
if ch_sel is None:
ch_sel = { (i, i) for i in ch_dict }
for ch_short, ch_name in ch_sel.items():
chi = ch_dict[ch_name]['chi']
# sig_stat = [(0=value,1=index),statistic parameter, channel]
# stat params = 0 max, 1 min, 2 mean, 3 std, 4 range, 5 abs max
# note that min, mean, std, and range are not relevant for index