Newer
Older
the output (that was optionally defined in ch_sel), and the value
is the dataframe containing the statistical values for all the
different selected cases.
"""
def add_df_row(df_dict, **kwargs):
"""
add a new channel to the df_dict format of ch_df
"""
for col, value in kwargs.items():
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
df_dict[col].append(value)
for col in (self.res.cols - set(kwargs.keys())):
df_dict[col].append('')
return df_dict
# in case the output changes, remember the original ch_sel
if ch_sel is not None:
ch_sel_init = ch_sel.copy()
else:
ch_sel_init = None
if ch_fatigue is None:
ch_fatigue_init = None
else:
ch_fatigue_init = ch_fatigue
# TODO: should the default tags not be all the tags in the cases dict?
tag_default = ['[case_id]', '[sim_id]']
tag_chan = 'channel'
# merge default with other tags
for tag in tag_default:
if tag not in tags:
tags.append(tag)
# tags can only be unique, when there the same tag appears twice
# it will break the DataFrame creation
if len(tags) is not len(set(tags)):
raise ValueError('tags can only contain unique entries')
# get some basic parameters required to calculate statistics
try:
case = list(self.cases.keys())[0]
except IndexError:
print('no cases to select so no statistics, aborting ...')
return None
post_dir = self.cases[case]['[post_dir]']
if not new_sim_id:
# select the sim_id from a random case
sim_id = self.cases[case]['[sim_id]']
else:
sim_id = new_sim_id
if not silent:
nrcases = len(self.cases)
print('='*79)
print('statistics for %s, nr cases: %i' % (sim_id, nrcases))
df_dict = None
add_stats = True

David Verelst
committed
# for finding [] tags
regex = re.compile('(\\[.*?\\])')
for ii, (cname, case) in enumerate(self.cases.items()):
# build the basic df_dict if not defined
if df_dict is None:
# the dictionary that will be used to create a pandas dataframe
df_dict = { tag:[] for tag in tags }
df_dict[tag_chan] = []
# add more columns that will help with IDing the channel
df_dict['channel_name'] = []
df_dict['channel_units'] = []
df_dict['channel_nr'] = []
df_dict['channel_desc'] = []
add_stats = True
if not silent:
pc = '%6.2f' % (float(ii)*100.0/float(nrcases))
pc += ' %'
print('stats progress: %4i/%i %s | %s' % (ii, nrcases, pc, cname))
# make sure the selected tags exist
if len(tags) != len(set(case) and tags):
raise KeyError(' not all selected tags exist in cases')
self.load_result_file(case)
ch_dict_new = {}
# this is really messy, now we are also in parallal using the
# channel DataFrame structure
ch_df_new = {col:[] for col in self.res.cols}
ch_df_new['ch_name'] = []
# calculate the statistics values
# stats = self.res.calc_stats(self.sig, i0=i0, i1=i1)
i_new_chans = self.sig.shape[1] # self.Nch
sig_size = self.res.N # len(self.sig[i0:i1,0])
new_sigs = np.ndarray((sig_size, 0))

David Verelst
committed
for name, expr in add_sigs.items():
channel_tags = regex.findall(expr)
# replace all sensor names with expressions
template = "self.sig[:,self.res.ch_dict['{}']['chi']]"
for chan in channel_tags:
# first remove the [] from the tag
# FIXME: fails when the same channel occurs more than once

David Verelst
committed
expr = expr.replace(chan, chan[1:-1])
expr = expr.replace(chan[1:-1], template.format(chan[1:-1]))
sig_add = np.ndarray((len(self.sig[:,0]), 1))
sig_add[:,0] = eval(expr)
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, sig_add, axis=1)
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
if add_sensor is not None:
chi1 = self.res.ch_dict[add_sensor['ch1_name']]['chi']
chi2 = self.res.ch_dict[add_sensor['ch2_name']]['chi']
name = add_sensor['ch_name_add']
factor = add_sensor['factor']
operator = add_sensor['operator']
p1 = self.sig[:,chi1]
p2 = self.sig[:,chi2]
sig_add = np.ndarray((len(p1), 1))
if operator == '*':
sig_add[:,0] = p1*p2*factor
elif operator == '/':
sig_add[:,0] = factor*p1/p2
else:
raise ValueError('Operator needs to be either * or /')
# add_stats = self.res.calc_stats(sig_add)
# add_stats_i = stats['max'].shape[0]
# add a new channel description for the mechanical power
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, sig_add, axis=1)
# # and append to all the statistics types
# for key, stats_arr in stats.iteritems():
# stats[key] = np.append(stats_arr, add_stats[key])
# calculate the resultants
sig_resultants = np.ndarray((sig_size, len(chs_resultant)))
inc = []
for j, chs in enumerate(chs_resultant):
sig_res = np.ndarray((sig_size, len(chs)))
lab = ''
no_channel = False
for i, ch in enumerate(chs):
# if the channel does not exist, zet to zero
try:
chi = self.res.ch_dict[ch]['chi']
sig_res[:,i] = self.sig[:,chi]
no_channel = False
except KeyError:
no_channel = True
lab += ch.split('-')[-1]
name = '-'.join(ch.split('-')[:-1] + [lab])
# when on of the components do no exist, we can not calculate
# the resultant!
if no_channel:
rpl = (name, cname)
print(' missing channel, no resultant for: %s, %s' % rpl)
continue
inc.append(j)
sig_resultants[:,j] = np.sqrt(sig_res*sig_res).sum(axis=1)
# resultant = np.sqrt(sig_resultants[:,j].reshape(self.res.N, 1))
# add_stats = self.res.calc_stats(resultant)
# add_stats_i = stats['max'].shape[0]
# add a new channel description for this resultant
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
# and append to all the statistics types
# for key, stats_arr in stats.iteritems():
# stats[key] = np.append(stats_arr, add_stats[key])
if len(chs_resultant) > 0:
# but only take the channels that where not missing
new_sigs = np.append(new_sigs, sig_resultants[:,inc], axis=1)
# calculate mechanical power first before deriving statistics
# from it
if calc_mech_power:
name = 'stats-shaft-power'
sig_pmech = np.ndarray((sig_size, 1))
sig_pmech[:,0] = self.shaft_power()
# P_mech_stats = self.res.calc_stats(sig_pmech)
# mech_stats_i = stats['max'].shape[0]
# add a new channel description for the mechanical power
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, sig_pmech, axis=1)
# and C_p_mech
if A is not None:
name = 'stats-cp-mech'
if ch_wind is None:
chiwind = self.res.ch_dict[self.find_windchan_hub()]['chi']
else:
chiwind = self.res.ch_dict[ch_wind]['chi']
wind = self.res.sig[:,chiwind]
cp = np.ndarray((sig_size, 1))
cp[:,0] = self.cp(-sig_pmech[:,0], wind, A)
# add a new channel description for the mechanical power
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, cp, axis=1)
try:
try:
nn_shaft = self.config['nn_shaft']
except:
nn_shaft = 4
chan_t = 'shaft_nonrotate-shaft-node-%3.3i-forcevec-z'%nn_shaft
i = self.res.ch_dict[chan_t]['chi']
thrust = self.res.sig[:,i]
name = 'stats-ct'
ct = np.ndarray((sig_size, 1))
ct[:,0] = self.ct(thrust, wind, A)
ch_dict_new[name] = {}
ch_dict_new[name]['chi'] = i_new_chans

David Verelst
committed
ch_df_new = add_df_row(ch_df_new, **{'chi':i_new_chans,
'ch_name':name})
i_new_chans += 1
new_sigs = np.append(new_sigs, ct, axis=1)
except KeyError:
print(' can not calculate CT')
# and append to all the statistics types
# for key, stats_arr in stats.iteritems():
# stats[key] = np.append(stats_arr, P_mech_stats[key])
if save_new_sigs and new_sigs.shape[1] > 0:
chis, keys = [], []
for key, value in ch_dict_new.items():
chis.append(value['chi'])
keys.append(key)
# sort on channel number, so it agrees with the new_sigs array
isort = np.array(chis).argsort()
keys = np.array(keys)[isort].tolist()
df_new_sigs = pd.DataFrame(new_sigs, columns=keys)
respath = os.path.join(case['[run_dir]'], case['[res_dir]'])
resfile = case['[case_id]']

David Verelst
committed
fname = os.path.join(respath, resfile + '_postres.csv')
print(' saving post-processed res: %s...' % fname, end='')

David Verelst
committed
df_new_sigs.to_csv(fname, sep='\t')
print('done!')
del df_new_sigs
ch_dict = self.res.ch_dict.copy()
ch_dict.update(ch_dict_new)
# ch_df = pd.concat([self.res.ch_df, pd.DataFrame(ch_df_new)])
# put all the extra channels into the results if we want to also
# be able to calculate the fatigue loads on them.
self.sig = np.append(self.sig, new_sigs, axis=1)
# calculate the statistics values
stats = self.res.calc_stats(self.sig, i0=i0, i1=i1)
# Because each channel is a new row, it doesn't matter how many
# data channels each case has, and this approach does not brake
# when different cases have a different number of output channels
# By default, just take all channels in the result file.
if ch_sel_init is None:
ch_sel = list(ch_dict.keys())
# ch_sel = ch_df.unique_ch_name.tolist()
# ch_sel = [str(k) for k in ch_sel]
print(' selecting all channels for statistics')
# calculate the fatigue properties from selected channels
fatigue, tags_fatigue = {}, []
if ch_fatigue_init is None:
ch_fatigue = ch_sel
print(' selecting all channels for fatigue')
else:
ch_fatigue = ch_fatigue_init
for ch_id in ch_fatigue:
chi = ch_dict[ch_id]['chi']
signal = self.sig[:,chi]
if neq is None:

David Verelst
committed
neq_ = float(case['[duration]'])
else:
neq_ = neq
eq = self.res.calc_fatigue(signal, no_bins=no_bins, neq=neq_,
m=m)
# save in the fatigue results

David Verelst
committed
fatigue[ch_id]['neq'] = neq_
# when calc_fatigue succeeds, we should have as many items
# as in m
if len(eq) == len(m):
for eq_, m_ in zip(eq, m):
fatigue[ch_id]['m=%2.01f' % m_] = eq_
# when it fails, we get an empty list back
else:
for m_ in m:
fatigue[ch_id]['m=%2.01f' % m_] = np.nan
# build the fatigue tags
for m_ in m:
tag = 'm=%2.01f' % m_
tags_fatigue.append(tag)
tags_fatigue.append('neq')
# -----------------------------------------------------------------
# define the pandas data frame dict on first run
# -----------------------------------------------------------------
# Only build the ch_sel collection once. By definition, the
# statistics, fatigue and htc tags will not change
if add_stats:
# statistical parameters
for statparam in list(stats.keys()):
df_dict[statparam] = []
# # additional tags
# for tag in tags:
# df_dict[tag] = []
# fatigue data
for tag in tags_fatigue:
df_dict[tag] = []
add_stats = False
for ch_id in ch_sel:
chi = ch_dict[ch_id]['chi']
# ch_name is not unique anymore, this doesn't work obviously!
# use the channel index instead, that is unique
# chi = ch_df[ch_df.unique_ch_name==ch_id].chi.values[0]
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
# sig_stat = [(0=value,1=index),statistic parameter, channel]
# stat params = 0 max, 1 min, 2 mean, 3 std, 4 range, 5 abs max
# note that min, mean, std, and range are not relevant for index
# values. Set to zero there.
# -------------------------------------------------------------
# Fill in all the values for the current data entry
# -------------------------------------------------------------
# the auxiliry columns
try:
name = self.res.ch_details[chi,0]
unit = self.res.ch_details[chi,1]
desc = self.res.ch_details[chi,2]
# the new channels from new_sigs are not in here
except (IndexError, AttributeError) as e:
name = ch_id
desc = ''
unit = ''
df_dict['channel_name'].append(name)
df_dict['channel_units'].append(unit)
df_dict['channel_desc'].append(desc)
df_dict['channel_nr'].append(chi)
# each df line is a channel of case that needs to be id-eed
df_dict[tag_chan].append(ch_id)
# for all the statistics keys, save the values for the
# current channel
for statparam in list(stats.keys()):
df_dict[statparam].append(stats[statparam][chi])
# and save the tags from the input htc file in order to
# label each different case properly
for tag in tags:
df_dict[tag].append(case[tag])
# append any fatigue channels if applicable, otherwise nan
if ch_id in fatigue:
for m_fatigue, eq_ in fatigue[ch_id].items():
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
df_dict[m_fatigue].append(eq_)
else:
for tag in tags_fatigue:
# TODO: or should this be NaN?
df_dict[tag].append(np.nan)
# when dealing with a lot of cases, save the stats data at
# intermediate points to avoid memory issues
if math.fmod(ii+1, saveinterval) == 0.0:
df_dict2 = self._df_dict_check_datatypes(df_dict)
# convert, save/update
if isinstance(suffix, str):
ext = suffix
elif suffix is True:
ext = '_%06i' % (ii+1)
else:
ext = ''
# dfs = self._df_dict_save(df_dict2, post_dir, sim_id, save=save,
# update=update, csv=csv, suffix=ext)
# TODO: test this first
fname = os.path.join(post_dir, sim_id + '_statistics' + ext)
dfs = misc.dict2df(df_dict2, fname, save=save, update=update,
csv=csv, xlsx=xlsx, check_datatypes=False,
complib=self.complib)
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
df_dict2 = None
df_dict = None
add_stats = True
# only save again when there is actual data in df_dict
if df_dict is not None:
# make consistent data types
df_dict2 = self._df_dict_check_datatypes(df_dict)
# convert, save/update
if isinstance(suffix, str):
ext = suffix
elif suffix is True:
ext = '_%06i' % ii
else:
ext = ''
# dfs = self._df_dict_save(df_dict2, post_dir, sim_id, save=save,
# update=update, csv=csv, suffix=ext)
# TODO: test this first
fname = os.path.join(post_dir, sim_id + '_statistics' + ext)
dfs = misc.dict2df(df_dict2, fname, save=save, update=update,
csv=csv, xlsx=xlsx, check_datatypes=False,
complib=self.complib)
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
return dfs
def _add2newsigs(self, ch_dict, name, i_new_chans, new_sigs, addendum):
ch_dict[name] = {}
ch_dict[name]['chi'] = i_new_chans
i_new_chans += 1
return ch_dict, np.append(new_sigs, addendum, axis=1)
# TODO: use the version in misc instead.
def _df_dict_save(self, df_dict2, post_dir, sim_id, save=True,
update=False, csv=True, suffix=None):
"""
Convert the df_dict to df and save/update.
DEPRICATED, use misc.dict2df instead
"""
if isinstance(suffix, str):
fpath = os.path.join(post_dir, sim_id + '_statistics' + suffix)
else:
fpath = os.path.join(post_dir, sim_id + '_statistics')
# in case converting to dataframe fails, fall back
try:
dfs = pd.DataFrame(df_dict2)
except Exception as e:
FILE = open(fpath + '.pkl', 'wb')
pickle.dump(df_dict2, FILE, protocol=2)
FILE.close()
# check what went wrong
misc.check_df_dict(df_dict2)
print('failed to convert to data frame, saved as dict')
raise(e)
# # apply categoricals to objects
# for column_name, column_dtype in dfs.dtypes.iteritems():
# # applying categoricals mostly makes sense for objects
# # we ignore all others
# if column_dtype.name == 'object':
# dfs[column_name] = dfs[column_name].astype('category')
# and save/update the statistics database
if save:
if update:
print('updating statistics: %s ...' % (post_dir + sim_id), end='')
try:
dfs.to_hdf('%s.h5' % fpath, 'table', mode='r+', append=True,

David Verelst
committed
format='table', complevel=9, complib=self.complib)
except IOError:
print('Can not update, file does not exist. Saving instead'
'...', end='')
dfs.to_hdf('%s.h5' % fpath, 'table', mode='w',

David Verelst
committed
format='table', complevel=9, complib=self.complib)
else:
print('saving statistics: %s ...' % (post_dir + sim_id), end='')
if csv:
dfs.to_csv('%s.csv' % fpath)
dfs.to_hdf('%s.h5' % fpath, 'table', mode='w',

David Verelst
committed
format='table', complevel=9, complib=self.complib)
print('DONE!!\n')
return dfs
# TODO: use the version in misc instead.
def _df_dict_check_datatypes(self, df_dict):
"""
there might be a mix of strings and numbers now, see if we can have
the same data type throughout a column
nasty hack: because of the unicode -> string conversion we might not
overwrite the same key in the dict.
DEPRICATED, use misc.df_dict_check_datatypes instead
"""
# FIXME: this approach will result in twice the memory useage though...
# we can not pop/delete items from a dict while iterating over it
df_dict2 = {}
for colkey, col in df_dict.items():
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
# if we have a list, convert to string
if type(col[0]).__name__ == 'list':
for ii, item in enumerate(col):
col[ii] = '**'.join(item)
# if we already have an array (statistics) or a list of numbers
# do not try to cast into another data type, because downcasting
# in that case will not raise any exception
elif type(col[0]).__name__[:3] in ['flo', 'int', 'nda']:
df_dict2[str(colkey)] = np.array(col)
continue
# in case we have unicodes instead of strings, we need to convert
# to strings otherwise the saved .h5 file will have pickled elements
try:
df_dict2[str(colkey)] = np.array(col, dtype=np.int32)
except OverflowError:
try:
df_dict2[str(colkey)] = np.array(col, dtype=np.int64)
except OverflowError:
df_dict2[str(colkey)] = np.array(col, dtype=np.float64)
except ValueError:
try:
df_dict2[str(colkey)] = np.array(col, dtype=np.float64)
except ValueError:
df_dict2[str(colkey)] = np.array(col, dtype=np.str)
except TypeError:
# in all other cases, make sure we have converted them to
# strings and NOT unicode
df_dict2[str(colkey)] = np.array(col, dtype=np.str)
except Exception as e:
print('failed to convert column %s to single data type' % colkey)
raise(e)
return df_dict2
def fatigue_lifetime(self, dfs, neq_life, res_dir='res/', fh_lst=None,
dlc_folder="dlc%s_iec61400-1ed3/", extra_cols=[],
save=False, update=False, csv=False, new_sim_id=False,
xlsx=False, years=20.0, silent=False):
"""
Cacluate the fatigue over a selection of cases and indicate how many
hours each case contributes to its life time.
This approach can only work reliably if the common DLC folder
structure is followed. This also means that a 'dlc_config.xlsx' Excel
file is required in the HAWC2 root directory (as defined in the
[run_dir] tag).
Parameters
----------
dfs : DataFrame
Statistics Pandas DataFrame. When extra_cols is not defined, it
should only hold the results of one standard organized DLC (one
turbine, one inflow case).
Reference number of cycles. Usually, neq is either set to 10e6,
10e7 or 10e8.
res_dir : str, default='res/'
Base directory of the results. Results would be located in

David Verelst
committed
res/dlc_folder/*.sel. Only relevant when fh_lst is None.
dlc_folder : str, default="dlc%s_iec61400-1ed3/"
String with the DLC subfolder names. One string substitution is
required (%s), and should represent the DLC number (withouth comma
or point). Not relevant when fh_lst is defined.
extra_cols : list, default=[]
The included columns are the material constants, and each row is
a channel. When multiple DLC cases are included in dfs, the user
has to define additional columns in order to distinguish between
the DLC cases.
fh_lst : list, default=None
Number of hours for each case over its life time. Format:
[(filename, hours),...] where, filename is the name of the file
(can be a full path, but only the base path is considered), hours
is the number of hours over the life time. When fh_lst is set,
years, res_dir, dlc_folder and dlc_name are not used.
years : float, default=20
Total life time expressed in years, only relevant when fh_lst is
None.
Returns
-------
df_Leq : DataFrame
Pandas DataFrame with the life time equivalent load for the given
neq, all the channels, and a range of material parameters m.
"""
if not silent:
print('Calculating life time fatigue load')
if not isinstance(neq_life, float):
neq_type = type(neq_life).__name__
msg = 'neq_life (reference nr of cycles for life time fatigue '
msg += 'load) should be a float instead of %s' % neq_type
raise ValueError(msg)
# get some basic parameters required to calculate statistics
try:
case = list(self.cases.keys())[0]
if not silent:
print('no cases to select so no statistics, aborting ...')
return None
post_dir = self.cases[case]['[post_dir]']
if not new_sim_id:
# select the sim_id from a random case
sim_id = self.cases[case]['[sim_id]']
else:
sim_id = new_sim_id
# FIXME: for backward compatibility, the column name of the unique
# channel name has been changed in the past....
if 'unique_ch_name' in dfs.columns:
chan_col_name = 'unique_ch_name'
else:
chan_col_name = 'channel'
# FIXME: wb has overlap with dlc_config.xlsx, and shape_k doesn't
# seemed to be used by DLCHighLevel
if 'Weibull' in self.config:
for key in self.config['Weibull']:
setattr(wb, key, self.config['Weibull'][key])
# we assume the run_dir (root) is the same every where
run_dir = self.cases[case]['[run_dir]']
fname = os.path.join(run_dir, 'dlc_config.xlsx')

David Verelst
committed
dlc_cfg = dlc.DLCHighLevel(fname, shape_k=wb.shape_k,
fail_on_resfile_not_found=True)
# if you need all DLCs, make sure to have %s in the file name
dlc_cfg.res_folder = os.path.join(run_dir, res_dir, dlc_folder)
# no need to build list of result files, we already have it form
# the statistics analysis
# TODO: could be faster if working with df directly, but how to
# assure you're res_dir is always ending with path separator?
# only take the values from 1 channel, not all of them!!
# FIXME: breaks when not all channels are present for all cases !
# solution: set channel "Time" as a minimum required channel!
val = dfs[chan_col_name].values[0]
sel = dfs[dfs[chan_col_name]==val]
p1, p2 = sel['[res_dir]'].values, sel['[case_id]'].values
files = [os.path.join(q1, q2) + '.sel' for q1, q2 in zip(p1, p2)]
fh_lst = dlc_cfg.file_hour_lst(years=years, files=files)
# now we have a full path to the result files, but we only need the
# the case_id to indentify the corresponding entry from the statistics
# DataFrame (exluciding the .sel extension)
case_ids = [os.path.basename(k[0].replace('.sel', '')) for k in fh_lst]
hours = [k[1] for k in fh_lst]
# safe how many hours each case is active for AEP calculations for
# debugging and inspection reasons.
# FIXME: this should be somewhere in its own method or something,
# and duplication with what is in AEP should be removed
fname = os.path.join(post_dir, sim_id + '_Leq_hourlist')
dict_Leq_h = {'case_id':case_ids, 'hours':hours}
df_Leq_h = misc.dict2df(dict_Leq_h, fname, update=update, csv=csv,
save=save, check_datatypes=True, xlsx=xlsx,
complib=self.complib)
# ---------------------------------------------------------------------
# column definitions
# ---------------------------------------------------------------------
# available material constants
ms, cols = [], []
if key[:2] == 'm=':
ms.append(key)
# when multiple DLC cases are included, add extra cols to identify each
# DLC group. Make a copy, because extra_cols does not get re-initiated
# when defined as an optional keyword argument
extra_cols_ = copy.copy(extra_cols + [chan_col_name])
cols.extend(extra_cols_)
# ---------------------------------------------------------------------
# Built the DataFrame, we do not have a unqique channel index
dict_Leq = {col:[] for col in cols}
# index on case_id on the original DataFrame so we can select accordingly
dfs = dfs.set_index('[case_id]')
# which rows to keep: a
# select for each channel all the cases
for grname, gr in dfs.groupby(dfs[chan_col_name]):
# if one m has any nan's, assume none of them are good and throw
# away
# if np.isnan(gr[ms[0]].values).any():
# sel_rows.pop(grname)
# continue
# select the cases in the same order as the corresponding hours
try:
sel_sort = gr.loc[case_ids]
except KeyError:
if not silent:
print(' ignore sensor for Leq:', grname)
for col in extra_cols_:
# at this stage we already should have one case, so its
# identifiers should also be.
val_unique = sel_sort[col].unique()
if len(val_unique) > 1:
print('found %i sets instead of 1:' % len(val_unique))
print(val_unique)
raise ValueError('For Leq load, the given DataFrame can '
'only hold one complete DLC set.')
# values of the identifier columns for each case. We do this
# in case the original dfs holds multiple DLC cases.
dict_Leq[col].append(sel_sort[col].unique()[0])
# R_eq is assumed to be expressed as the 1Hz equivalent load
# where neq is set to the simulation lenght
# neq_1hz = sel_sort['neq'].values
# sel_sort[m] holds the equivalent loads for each of the DLC
# cases: such all the different wind speeds for dlc1.2

David Verelst
committed
m_ = float(m.split('=')[1])
# do not multi-ply out neq_1hz from R_eq
R_eq_mod = np.power(sel_sort[m].values, m_)
# R_eq_mod will have to be scaled from its simulation length
# to 1 hour (hour distribution is in hours...). Since the
# simulation time has not been multiplied out of R_eq_mod yet,
# we can just multiply with 3600 (instead of doing 3600/neq)
tmp = (R_eq_mod * np.array(hours) * 3600).sum()
# the effective Leq for each of the material constants

David Verelst
committed
dict_Leq[m].append(math.pow(tmp/neq_life, 1.0/m_))
# the following is twice as slow:
# [i*j for (i,j) in zip(sel_sort[m].values.tolist(),hours)]
# collens = misc.check_df_dict(dict_Leq)
# make consistent data types, and convert to DataFrame
fname = os.path.join(post_dir, sim_id + '_Leq')
df_Leq = misc.dict2df(dict_Leq, fname, save=save, update=update,
csv=csv, check_datatypes=True, xlsx=xlsx,
complib=self.complib)
# only keep the ones that do not have nan's (only works with index)
return df_Leq
def AEP(self, dfs, fh_lst=None, ch_powe='DLL-2-inpvec-2', extra_cols=[],
res_dir='res/', dlc_folder="dlc%s_iec61400-1ed3/", csv=False,
new_sim_id=False, save=False, years=20.0, update=False, xlsx=False):
"""
Calculate the Annual Energy Production (AEP) for DLC1.2 cases.
Parameters
----------
dfs : DataFrame
Statistics Pandas DataFrame. When extra_cols is not defined, it
should only hold the results of one standard organized DLC (one
turbine, one inflow case).
fh_lst : list, default=None
Number of hours for each case over its life time. Format:
[(filename, hours),...] where, filename is the name of the file
(can be a full path, but only the base path is considered), hours
is the number of hours over the life time. When fh_lst is set,
dlc_folder and dlc_name are not used.
ch_powe : string, default='DLL-2-inpvec-2'
extra_cols : list, default=[]
The included column is just the AEP, and each row is
a channel. When multiple DLC cases are included in dfs, the user
has to define additional columns in order to distinguish between
the DLC cases.
res_dir : str, default='res/'
Base directory of the results. Results would be located in
res/dlc_folder/*.sel
dlc_folder : str, default="dlc%s_iec61400-1ed3/"
String with the DLC subfolder names. One string substitution is
required (%s), and should represent the DLC number (withouth comma
or point). Not relevant when fh_lst is defined.
"""
# get some basic parameters required to calculate statistics
try:
case = list(self.cases.keys())[0]
except IndexError:
print('no cases to select so no statistics, aborting ...')
return None
post_dir = self.cases[case]['[post_dir]']
if not new_sim_id:
# select the sim_id from a random case
sim_id = self.cases[case]['[sim_id]']
else:
sim_id = new_sim_id
# FIXME: for backward compatibility, the column name of the unique
# channel name has been changed in the past....
if 'unique_ch_name' in dfs.columns:
chan_col_name = 'unique_ch_name'
else:
chan_col_name = 'channel'
if fh_lst is None:
wb = WeibullParameters()
if 'Weibull' in self.config:
for key in self.config['Weibull']:
setattr(wb, key, self.config['Weibull'][key])
# we assume the run_dir (root) is the same every where
run_dir = self.cases[list(self.cases.keys())[0]]['[run_dir]']
fname = os.path.join(run_dir, 'dlc_config.xlsx')
dlc_cfg = dlc.DLCHighLevel(fname, shape_k=wb.shape_k)
# if you need all DLCs, make sure to have %s in the file name
dlc_cfg.res_folder = os.path.join(run_dir, res_dir, dlc_folder)
# TODO: could be faster if working with df directly, but how to
# assure you're res_dir is always ending with path separator?
# FIXME: breaks when not all channels are present for all cases !
# solution: set channel "Time" as a minimum required channel!
val = dfs[chan_col_name].values[0]
sel = dfs[dfs[chan_col_name]==val]
p1, p2 = sel['[res_dir]'].values, sel['[case_id]'].values
files = [os.path.join(q1, q2) + '.sel' for q1, q2 in zip(p1, p2)]
fh_lst = dlc_cfg.file_hour_lst(years=1.0, files=files)
# now we have a full path to the result files, but we only need the
# the case_id to indentify the corresponding entry from the statistics
# DataFrame (exluciding the .sel extension)
def basename(k):
return os.path.basename(k[0].replace('.sel', ''))
fh_lst_basename = [(basename(k), k[1]) for k in fh_lst]
# only take dlc12 for power production
case_ids = [k[0] for k in fh_lst_basename if k[0][:5]=='dlc12']
hours = [k[1] for k in fh_lst_basename if k[0][:5]=='dlc12']
# safe how many hours each case is active for AEP calculations for
# debugging and inspection reasons.
# FIXME: this should be somewhere in its own method or something,
# and duplication with what is in fatigue_lifetime should be removed
fname = os.path.join(post_dir, sim_id + '_AEP_hourlist')
dict_AEP_h = {'case_id':case_ids, 'hours':hours}
df_AEP_h = misc.dict2df(dict_AEP_h, fname, update=update, csv=csv,
save=save, check_datatypes=True, xlsx=xlsx,
complib=self.complib)
# and select only the power channels
dfs_powe = dfs[dfs[chan_col_name]==ch_powe]
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
# by default we have AEP as a column
cols = ['AEP']
cols.extend(extra_cols)
# Built the DataFrame, we do not have a unqique channel index
dict_AEP = {col:[] for col in cols}
# index on case_id on the original DataFrame so we can select accordingly
dfs_powe = dfs_powe.set_index('[case_id]')
# select the cases in the same order as the corresponding hours
sel_sort = dfs_powe.loc[case_ids]
for col in extra_cols:
# at this stage we already should have one case, so its
# identifiers should also be.
val_unique = sel_sort[col].unique()
if len(val_unique) > 1:
print('found %i sets instead of 1:' % len(val_unique))
print(val_unique)
raise ValueError('For AEP, the given DataFrame can only hold'
'one complete DLC set. Make sure to identify '
'the proper extra_cols to identify the '
'different DLC sets.')
# values of the identifier columns for each case. We do this
# in case the original dfs holds multiple DLC cases.
dict_AEP[col].append(sel_sort[col].unique()[0])
# and the AEP: take the average, multiply with the duration
# duration = sel_sort['[duration]'].values
# power_mean = sel_sort['mean'].values
AEP = (sel_sort['mean'].values * np.array(hours)).sum()
dict_AEP['AEP'].append(AEP)
# make consistent data types, and convert to DataFrame
fname = os.path.join(post_dir, sim_id + '_AEP')
df_AEP = misc.dict2df(dict_AEP, fname, update=update, csv=csv,
save=save, check_datatypes=True, xlsx=xlsx,
complib=self.complib)
def stats2dataframe(self, ch_sel=None, tags=['[seed]','[windspeed]']):
"""
Convert the archaic statistics dictionary of a group of cases to
a more convienent pandas dataframe format.
DEPRICATED, use statistics instead!!
Parameters
----------
ch_sel : dict, default=None
Map short names to the channel id's defined in ch_dict in order to
have more human readable column names in the pandas dataframe. By
default, if ch_sel is None, a dataframe for each channel in the
ch_dict (so in the HAWC2 output) will be created. When ch_sel is
defined, only those channels are considered.
ch_sel[short name] = full ch_dict identifier
tags : list, default=['[seed]','[windspeed]']
Select which tag values from cases should be included in the
dataframes. This will help in selecting and identifying the
different cases.
Returns
-------
dfs : dict
Dictionary of dataframes, where the key is the channel name of
the output (that was optionally defined in ch_sel), and the value
is the dataframe containing the statistical values for all the
different selected cases.
"""
df_dict = {}
for cname, case in self.cases.items():
# make sure the selected tags exist
if len(tags) != len(set(case) and tags):
raise KeyError('not all selected tags exist in cases')
sig_stats = self.stats_dict[cname]['sig_stats']
ch_dict = self.stats_dict[cname]['ch_dict']
if ch_sel is None:
ch_sel = { (i, i) for i in ch_dict }
for ch_short, ch_name in ch_sel.items():
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
chi = ch_dict[ch_name]['chi']
# sig_stat = [(0=value,1=index),statistic parameter, channel]
# stat params = 0 max, 1 min, 2 mean, 3 std, 4 range, 5 abs max
# note that min, mean, std, and range are not relevant for index
# values. Set to zero there.
try:
df_dict[ch_short]['case name'].append(cname)
df_dict[ch_short]['max'].append( sig_stats[0,0,chi])
df_dict[ch_short]['min'].append( sig_stats[0,1,chi])
df_dict[ch_short]['mean'].append( sig_stats[0,2,chi])
df_dict[ch_short]['std'].append( sig_stats[0,3,chi])
df_dict[ch_short]['range'].append( sig_stats[0,4,chi])
df_dict[ch_short]['absmax'].append(sig_stats[0,5,chi])
for tag in tags:
df_dict[ch_short][tag].append(case[tag])
except KeyError:
df_dict[ch_short] = {'case name' : [cname]}
df_dict[ch_short]['max'] = [sig_stats[0,0,chi]]
df_dict[ch_short]['min'] = [sig_stats[0,1,chi]]
df_dict[ch_short]['mean'] = [sig_stats[0,2,chi]]
df_dict[ch_short]['std'] = [sig_stats[0,3,chi]]
df_dict[ch_short]['range'] = [sig_stats[0,4,chi]]
df_dict[ch_short]['absmax'] = [sig_stats[0,5,chi]]
for tag in tags:
df_dict[ch_short][tag] = [ case[tag] ]
# and create for each channel a dataframe
dfs = {}
for ch_short, df_values in df_dict.items():
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
dfs[ch_short] = pd.DataFrame(df_values)
return dfs
def load_azimuth(self, azi, load, sectors=360):
"""
Establish load dependency on rotor azimuth angle
"""
# sort on azimuth angle
isort = np.argsort(azi)
azi = azi[isort]
load = load[isort]
azi_sel = np.linspace(0, 360, num=sectors)
load_sel = np.interp(azi_sel, azi, load)
def find_windchan_hub(self):
"""
"""
# if we sort we'll get the largest absolute coordinate last
for ch in sorted(self.res.ch_dict.keys()):
if ch[:29] == 'windspeed-global-Vy-0.00-0.00':
chan_found = ch