Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • toolbox/WindEnergyToolbox
  • tlbl/WindEnergyToolbox
  • cpav/WindEnergyToolbox
  • frza/WindEnergyToolbox
  • borg/WindEnergyToolbox
  • mmpe/WindEnergyToolbox
  • ozgo/WindEnergyToolbox
  • dave/WindEnergyToolbox
  • mmir/WindEnergyToolbox
  • wluo/WindEnergyToolbox
  • welad/WindEnergyToolbox
  • chpav/WindEnergyToolbox
  • rink/WindEnergyToolbox
  • shfe/WindEnergyToolbox
  • shfe1/WindEnergyToolbox
  • acdi/WindEnergyToolbox
  • angl/WindEnergyToolbox
  • wliang/WindEnergyToolbox
  • mimc/WindEnergyToolbox
  • wtlib/WindEnergyToolbox
  • cmos/WindEnergyToolbox
  • fabpi/WindEnergyToolbox
22 results
Show changes
Showing
with 1044 additions and 823 deletions
......@@ -3,24 +3,19 @@ Created on 16/07/2013
@author: mmpe
'''
from __future__ import division
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import absolute_import
from future import standard_library
import sys
standard_library.install_aliases()
import unittest
import numpy as np
from wetb.fatigue_tools.fatigue import eq_load, rainflow_astm, rainflow_windap, \
cycle_matrix
from wetb.fatigue_tools.fatigue import (eq_load, rainflow_astm,
rainflow_windap, cycle_matrix)
from wetb.hawc2 import Hawc2io
import os
testfilepath = os.path.join(os.path.dirname(__file__), 'test_files/') # test file path
class TestFatigueTools(unittest.TestCase):
def test_leq_1hz(self):
......@@ -31,59 +26,79 @@ class TestFatigueTools(unittest.TestCase):
m = 1
point_per_deg = 100
# sine signal with 10 periods (20 peaks)
for amplitude in [1, 2, 3]:
peak2peak = amplitude * 2
# sine signal with 10 periods (20 peaks)
nr_periods = 10
time = np.linspace(0, nr_periods * 2 * np.pi, point_per_deg * 180)
neq = time[-1]
# mean value of the signal shouldn't matter
signal = amplitude * np.sin(time) + 5
r_eq_1hz = eq_load(signal, no_bins=1, m=m, neq=neq)[0]
r_eq_1hz_expected = ((2 * nr_periods * amplitude**m) / neq)**(1 / m)
np.testing.assert_allclose(r_eq_1hz, r_eq_1hz_expected)
# sine signal with 20 periods (40 peaks)
nr_periods = 20
time = np.linspace(0, nr_periods * 2 * np.pi, point_per_deg * 180)
neq = time[-1]
# mean value of the signal shouldn't matter
signal = amplitude * np.sin(time) + 9
r_eq_1hz2 = eq_load(signal, no_bins=1, m=m, neq=neq)[0]
r_eq_1hz_expected2 = ((2 * nr_periods * amplitude**m) / neq)**(1 / m)
np.testing.assert_allclose(r_eq_1hz2, r_eq_1hz_expected2)
# 1hz equivalent should be independent of the length of the signal
np.testing.assert_allclose(r_eq_1hz, r_eq_1hz2)
def test_rainflow_combi(self):
"""Signal with two frequencies and amplitudes
"""
amplitude = 1
# peak2peak = amplitude * 2
m = 1
point_per_deg = 100
nr_periods = 10
time = np.linspace(0, nr_periods*2*np.pi, point_per_deg*180)
neq = time[-1]
# mean value of the signal shouldn't matter
signal = amplitude * np.sin(time) + 5
r_eq_1hz = eq_load(signal, no_bins=1, m=m, neq=neq)[0]
r_eq_1hz_expected = ((2*nr_periods*amplitude**m)/neq)**(1/m)
np.testing.assert_allclose(r_eq_1hz, r_eq_1hz_expected)
# sine signal with 20 periods (40 peaks)
nr_periods = 20
time = np.linspace(0, nr_periods*2*np.pi, point_per_deg*180)
neq = time[-1]
# mean value of the signal shouldn't matter
signal = amplitude * np.sin(time) + 9
r_eq_1hz2 = eq_load(signal, no_bins=1, m=m, neq=neq)[0]
r_eq_1hz_expected2 = ((2*nr_periods*amplitude**m)/neq)**(1/m)
np.testing.assert_allclose(r_eq_1hz2, r_eq_1hz_expected2)
# 1hz equivalent load should be independent of the length of the signal
np.testing.assert_allclose(r_eq_1hz, r_eq_1hz2)
time = np.linspace(0, nr_periods * 2 * np.pi, point_per_deg * 180)
signal = (amplitude * np.sin(time)) + 5 + (amplitude * 0.2 * np.cos(5 * time))
cycles, ampl_bin_mean, ampl_edges, mean_bin_mean, mean_edges = \
cycle_matrix(signal, ampl_bins=10, mean_bins=5)
cycles.sum()
def test_astm1(self):
signal = np.array([-2.0, 0.0, 1.0, 0.0, -3.0, 0.0, 5.0, 0.0, -1.0, 0.0, 3.0, 0.0, -4.0, 0.0, 4.0, 0.0, -2.0])
ampl, mean = rainflow_astm(signal)
np.testing.assert_array_equal(np.histogram2d(ampl, mean, [6, 4])[0], np.array([[ 0., 1., 0., 0.],
[ 1., 0., 0., 2.],
[ 0., 0., 0., 0.],
[ 0., 0., 0., 1.],
[ 0., 0., 0., 0.],
[ 0., 0., 1., 2.]]))
np.testing.assert_array_equal(np.histogram2d(ampl, mean, [6, 4])[0], np.array([[0., 1., 0., 0.],
[1., 0., 0., 2.],
[0., 0., 0., 0.],
[0., 0., 0., 1.],
[0., 0., 0., 0.],
[0., 0., 1., 2.]]))
def test_windap1(self):
signal = np.array([-2.0, 0.0, 1.0, 0.0, -3.0, 0.0, 5.0, 0.0, -1.0, 0.0, 3.0, 0.0, -4.0, 0.0, 4.0, 0.0, -2.0])
ampl, mean = rainflow_windap(signal, 18, 2)
np.testing.assert_array_equal(np.histogram2d(ampl, mean, [6, 4])[0], np.array([[ 0., 0., 1., 0.],
[ 1., 0., 0., 2.],
[ 0., 0., 0., 0.],
[ 0., 0., 0., 1.],
[ 0., 0., 0., 0.],
[ 0., 0., 2., 1.]]))
np.testing.assert_array_equal(np.histogram2d(ampl, mean, [6, 4])[0], np.array([[0., 0., 1., 0.],
[1., 0., 0., 2.],
[0., 0., 0., 0.],
[0., 0., 0., 1.],
[0., 0., 0., 0.],
[0., 0., 2., 1.]]))
def test_windap2(self):
data = Hawc2io.ReadHawc2(testfilepath + "test").ReadBinary([2]).flatten()
np.testing.assert_allclose(eq_load(data, neq=61), np.array([[1.356, 1.758, 2.370, 2.784, 3.077, 3.296]]), 0.01)
def test_astm2(self):
data = Hawc2io.ReadHawc2(testfilepath + "test").ReadBinary([2]).flatten()
np.testing.assert_allclose(eq_load(data, neq=61, rainflow_func=rainflow_astm), np.array([[1.356, 1.758, 2.370, 2.784, 3.077, 3.296]]), 0.01)
np.testing.assert_allclose(eq_load(data, neq=61, rainflow_func=rainflow_astm),
np.array([[1.356, 1.758, 2.370, 2.784, 3.077, 3.296]]), 0.01)
# def test_windap3(self):
......@@ -96,20 +111,40 @@ class TestFatigueTools(unittest.TestCase):
# [ 0., 0., 0., 0.],
# [ 0., 1., 2., 0.]]) / 2)
def test_astm3(self):
data = Hawc2io.ReadHawc2(testfilepath + "test").ReadBinary([2]).flatten()
np.testing.assert_allclose(cycle_matrix(data, 4, 4, rainflow_func=rainflow_astm)[0], np.array([[ 24., 83., 53., 26.],
[ 0., 1., 4., 0.],
[ 0., 0., 0., 0.],
[ 0., 1., 2., 0.]]) / 2, 0.001)
np.testing.assert_allclose(cycle_matrix(data, 4, 4, rainflow_func=rainflow_astm)[0], np.array([[24., 83., 53., 26.],
[0., 1., 4., 0.],
[0., 0., 0., 0.],
[0., 1., 2., 0.]]) / 2, 0.001)
def test_astm_weighted(self):
data = Hawc2io.ReadHawc2(testfilepath + "test").ReadBinary([2]).flatten()
np.testing.assert_allclose(cycle_matrix([(1, data),(1,data)], 4, 4, rainflow_func=rainflow_astm)[0], np.array([[ 24., 83., 53., 26.],
[ 0., 1., 4., 0.],
[ 0., 0., 0., 0.],
[ 0., 1., 2., 0.]]) , 0.001)
np.testing.assert_allclose(cycle_matrix([(1, data), (1, data)], 4, 4, rainflow_func=rainflow_astm)[0], np.array([[24., 83., 53., 26.],
[0., 1.,
4., 0.],
[0., 0.,
0., 0.],
[0., 1., 2., 0.]]), 0.001)
def test_astm_matlab_example(self):
# example from https://se.mathworks.com/help/signal/ref/rainflow.html
fs = 512
X = np.array([-2, 1, -3, 5, -1, 3, -4, 4, -2])
Y = -np.diff(X)[:, np.newaxis] / 2. * np.cos(np.pi *
np.arange(0, 1, 1 / fs))[np.newaxis] + ((X[:-1] + X[1:]) / 2)[:, np.newaxis]
Y = np.r_[Y.flatten(), X[-1]]
range_lst, mean_lst = (rainflow_astm(Y))
np.testing.assert_array_equal(range_lst, [3, 4, 4, 4, 8, 9, 8, 6])
np.testing.assert_array_equal(mean_lst, [-.5, -1, 1, 1, 1, .5, 0, 1])
if 0:
import matplotlib.pyplot as plt
plt.plot(np.arange(0, len(X) - 1 + 1 / fs, 1 / fs), Y)
plt.plot(np.arange(len(X)), X, 'o')
plt.show()
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
......
......@@ -2,6 +2,7 @@ from ._io import load, read_sensor_info
from wetb import gtsdf
import numpy as np
class Dataset(gtsdf.Dataset):
def __init__(self, filename, name_stop=8,dtype=np.float):
self.time, self.data, self.info = load(filename, name_stop,dtype)
\ No newline at end of file
def __init__(self, filename, name_stop=8, dtype=float):
self.time, self.data, self.info = load(filename, name_stop, dtype)
......@@ -7,7 +7,8 @@ import struct
import numpy as np
import os
def load(filename, name_stop=8, dtype=np.float):
def load(filename, name_stop=8, dtype=float):
"""
Parameters
----------
......@@ -15,10 +16,10 @@ def load(filename, name_stop=8, dtype=np.float):
- name_stop : int or str
if int: Number of characters for name
if str: name-description delimiter, e.g. " "
- dtype
- dtype
"""
if isinstance(filename, str):
fid = open(filename,'rb')
fid = open(filename, 'rb')
elif hasattr(filename, "name"):
fid = filename
filename = fid.name
......@@ -26,11 +27,11 @@ def load(filename, name_stop=8, dtype=np.float):
_ = struct.unpack('i', fid.read(4))
_ = struct.unpack('i', fid.read(4))
title = fid.read(60).strip()
_ = struct.unpack('i', fid.read(4))
_ = struct.unpack('i', fid.read(4))
no_sensors = struct.unpack('i', fid.read(4))[0]
sensor_numbers = [struct.unpack('i', fid.read(4))[0] for _ in range(no_sensors)]
_ = struct.unpack('i', fid.read(4))
_ = struct.unpack('i', fid.read(4))
......@@ -38,9 +39,9 @@ def load(filename, name_stop=8, dtype=np.float):
time_step = struct.unpack('f', fid.read(4))[0]
scale_factors = np.array([struct.unpack('f', fid.read(4))[0] for _ in range(no_sensors)], dtype=np.double)
data = np.fromstring(fid.read(), 'int16').astype(dtype)
finally:
finally:
fid.close()
# if title.isalnum():
# self.set_info(title, "", self.filename)
# else:
......@@ -48,36 +49,36 @@ def load(filename, name_stop=8, dtype=np.float):
try:
data = data.reshape(len(data) // no_sensors, no_sensors)
except ValueError:
raise ValueError("The number of data values (%d) is not divisible by the number of sensors (%d)" % (len(data), no_sensors))
raise ValueError(
"The number of data values (%d) is not divisible by the number of sensors (%d)" %
(len(data), no_sensors))
for i in range(data.shape[1]):
data[:, i] *= scale_factors[i]
no_scans = data.shape[0]
# Load sensor file if exists
sensor_filename = os.path.join(os.path.dirname(filename), "sensor")
sensor_info = {info[0]:info[1:] for info in read_sensor_info(sensor_filename, name_stop) }
sensor_info = {info[0]: info[1:] for info in read_sensor_info(sensor_filename, name_stop)}
# set gain and offset for "Time"
gains = []
offsets = []
names, units, descriptions = [], [], []
for sensor_nr in sensor_numbers:
name, unit, description, gain, offset = sensor_info.get(sensor_nr, ["Attribute %d"%sensor_nr, '-','',1,0])
if sensor_nr==1 and name=="Time" and unit=="s":
data = data[:,1:]
name, unit, description, gain, offset = sensor_info.get(sensor_nr, ["Attribute %d" % sensor_nr, '-', '', 1, 0])
if sensor_nr == 1 and name == "Time" and unit == "s":
data = data[:, 1:]
continue
names.append(name)
units.append(unit)
descriptions.append(description)
gains.append(gain)
offsets.append(offset)
time = np.arange(time_start, time_start + data.shape[0] * time_step, time_step).reshape((no_scans, 1))
#data = np.concatenate((time, data), axis=1)
#gains = np.r_[1,gains]
......@@ -86,10 +87,10 @@ def load(filename, name_stop=8, dtype=np.float):
# self[:]+=self.offsets
info = {"name": title,
"description": filename,
"attribute_names": names,
"attribute_units": units,
"attribute_names": names,
"attribute_units": units,
"attribute_descriptions": descriptions}
return time, data, info
return time, data, info
def read_sensor_info(sensor_file, name_stop=8):
......@@ -99,7 +100,7 @@ def read_sensor_info(sensor_file, name_stop=8):
- sensor_file
- name_stop : int or str
if int: Number of characters for name
if str: name-description delimiter, e.g. " "
if str: name-description delimiter, e.g. " "
"""
if hasattr(sensor_file, 'readlines'):
......@@ -116,15 +117,14 @@ def read_sensor_info(sensor_file, name_stop=8):
gain = float(line[1])
offset = float(line[2])
unit = line[5]
if isinstance(name_stop,int):
if isinstance(name_stop, int):
name_desc = " ".join(line[6:])
name = name_desc[:name_stop].strip()
description = name_desc[name_stop:].strip()
elif isinstance(name_stop,str):
elif isinstance(name_stop, str):
name_desc = (" ".join(line[6:])).split(name_stop)
name = name_desc[0].strip()
description = name_stop.join(name_desc[1:]).strip()
sensor_info.append((nr, name, unit, description, gain, offset))
return sensor_info
......@@ -135,7 +135,7 @@ def read_sensor_info(sensor_file, name_stop=8):
# f = open(filename, 'wb')
# time_att = ds.basis_attribute()
# sensors = [s for s in ds.attributes() if s is not time_att]
#
#
# if isinstance(ds, FLEX4Dataset):
# data = ds[:] # (ds[:]-ds.offsets)/ds.gains
# else:
......@@ -152,7 +152,7 @@ def read_sensor_info(sensor_file, name_stop=8):
# f.write(struct.pack('ii', 0, 0)) # 2x empty int
# time = ds.basis_attribute()
# f.write(struct.pack('ff', time[0], time[1] - time[0])) # start time and time step
#
#
# scale_factors = np.max(np.abs(data), 0) / 32000
# f.write(struct.pack('f' * len(scale_factors), *scale_factors))
# # avoid dividing by zero
......@@ -162,13 +162,13 @@ def read_sensor_info(sensor_file, name_stop=8):
# data = np.round(data.flatten()).astype(np.int16)
# f.write(struct.pack('h' * len(data), *data.tolist()))
# f.close()
#
#
# # write sensor file
# f = open(os.path.join(os.path.dirname(filename), 'sensor'), 'w')
# f.write("Sensor list for %s\n" % filename)
# f.write(" No forst offset korr. c Volt Unit Navn Beskrivelse------------\n")
# sensorlineformat = "%3s %.3f %.3f 1.00 0.00 %7s %-8s %s\n"
#
#
# if isinstance(ds, FLEX4Dataset):
# gains = np.r_[ds.gains[1:], np.ones(ds.shape[1] - len(ds.gains))]
# offsets = np.r_[ds.offsets[1:], np.zeros(ds.shape[1] - len(ds.offsets))]
......@@ -176,4 +176,4 @@ def read_sensor_info(sensor_file, name_stop=8):
# else:
# sensorlines = [sensorlineformat % ((nr + 1), 1, 0, att.unit[:7], att.name.replace(" ", "_")[:8], att.description[:512]) for nr, att in enumerate(sensors)]
# f.writelines(sensorlines)
# f.close()
\ No newline at end of file
# f.close()
......@@ -24,12 +24,6 @@ This module contains three methods:
.. _append_block: gtsdf.html#gtsdf.append_block
"""
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
d = None
d = dir()
......@@ -38,9 +32,6 @@ from .gtsdf import save
from .gtsdf import load
from .gtsdf import append_block
from .gtsdf import load_pandas
from .gtsdf import add_statistic
from .gtsdf import load_statistic
from .gtsdf import compress2statistics
class Dataset(object):
def __init__(self, filename):
......
from __future__ import division, print_function, absolute_import, unicode_literals
from builtins import zip
from builtins import range
from builtins import str
from future import standard_library
from wetb.fatigue_tools.fatigue import eq_load
standard_library.install_aliases()
import warnings
from wetb.gtsdf.unix_time import from_unix
try:
import h5py
except ImportError as e:
raise ImportError("HDF5 library cannot be loaded. Windows XP is a known cause of this problem\n%s" % e)
import os
import numpy as np
import numpy.ma as ma
import pandas as pd
block_name_fmt = "block%04d"
def load(filename, dtype=None):
"""Load a 'General Time Series Data Format'-hdf5 datafile
Parameters
----------
filename : str or h5py.File
filename or open file object
dtype: data type, optional
type of returned data array, e.g. float16, float32 or float64.
If None(default) the type of the returned data depends on the type of the file data
Returns
-------
time : ndarray(dtype=float64), shape (no_observations,)
time
data : ndarray(dtype=dtype), shape (no_observations, no_attributes)
data
info : dict
info containing:
- type: "General Time Series Data Format"
- name: name of dataset or filename if not present in file
- no_attributes: Number of attributes
- no_blocks: Number of datablocks
- [description]: description of dataset or "" if not present in file
- [attribute_names]: list of attribute names
- [attribute_units]: list of attribute units
- [attribute_descriptions]: list of attribute descriptions
See Also
--------
gtsdf, save
Examples
--------
>>> import gtsdf
>>> data = np.arange(6).reshape(3,2)
>>> gtsdf.save('test.hdf5', data)
>>> time, data, info = gtsdf.load('test.hdf5')
>>> print time
[ 0. 1. 2.]
>>> print data
[[ 0. 1.]
[ 2. 3.]
[ 4. 5.]]
>>> print info
{'no_blocks': 1, 'type': 'General time series data format', 'name': 'test', 'no_attributes': 2, 'description': ''}
>>> gtsdf.save('test.hdf5', data, name='MyDataset',
description='MyDatasetDescription',
attribute_names=['Att1', 'Att2'],
attribute_units=['m', "m/s"],
attribute_descriptions=['Att1Desc', 'Att2Desc'],
time = np.array([0,1,4]),
time_start = 10,
time_step=2,
dtype=np.float64)
>>> time, data, info = gtsdf.load('test.hdf5')
>>> print time
[ 10. 12. 18.]
>>> print data
[[ 0. 1.]
[ 2. 3.]
[ 4. 5.]]
>>> print info
{'attribute_names': array(['Att1', 'Att2'], dtype='|S4'),
'attribute_units': array(['m', 'm/s'], dtype='|S3'),
'attribute_descriptions': array(['Att1Desc', 'Att2Desc'], dtype='|S8'),
'name': 'MyDataset',
'no_attributes': 2,
'no_blocks': 1,
'type': 'General time series data format',
'description': 'MyDatasetDescription'}
"""
f = _open_h5py_file(filename)
try:
info = _load_info(f)
time, data = _load_timedata(f,dtype)
return time, data, info
finally:
try:
f.close()
except:
pass
def _open_h5py_file(filename):
if isinstance(filename, h5py.File):
f = filename
filename = f.filename
else:
assert os.path.isfile(filename), "File, %s, does not exists" % filename
f = h5py.File(filename, 'r')
return f
def decode(v):
if isinstance(v, bytes):
return v.decode('latin1')
elif hasattr(v,'len'):
return [decode(v_) for v_ in v]
return v
def _load_info(f):
info = {k: decode(v) for k, v in f.attrs.items()}
check_type(f)
if 'name' not in info:
info['name'] = os.path.splitext(os.path.basename(f.filename))[0]
if 'attribute_names' in f:
info['attribute_names'] = [v.decode('latin1') for v in f['attribute_names']]
if 'attribute_units' in f:
info['attribute_units'] = [v.decode('latin1') for v in f['attribute_units']]
if 'attribute_descriptions' in f:
info['attribute_descriptions'] = [v.decode('latin1') for v in f['attribute_descriptions']]
return info
def _load_timedata(f, dtype):
no_blocks = f.attrs['no_blocks']
if (block_name_fmt % 0) not in f:
raise ValueError("HDF5 file must contain a group named '%s'" % (block_name_fmt % 0))
block0 = f[block_name_fmt % 0]
if 'data' not in block0:
raise ValueError("group %s must contain a dataset called 'data'" % (block_name_fmt % 0))
_, no_attributes = block0['data'].shape
if dtype is None:
file_dtype = f[block_name_fmt % 0]['data'].dtype
if "float" in str(file_dtype):
dtype = file_dtype
elif file_dtype in [np.int8, np.uint8, np.int16, np.uint16]:
dtype = np.float32
else:
dtype = np.float64
time = []
data = []
for i in range(no_blocks):
try:
block = f[block_name_fmt % i]
except KeyError:
continue
no_observations, no_attributes = block['data'].shape
block_time = (block.get('time', np.arange(no_observations))[:]).astype(np.float64)
if 'time_step' in block.attrs:
block_time *= block.attrs['time_step']
if 'time_start' in block.attrs:
block_time += block.attrs['time_start']
time.extend(block_time)
block_data = block['data'][:].astype(dtype)
if "int" in str(block['data'].dtype):
block_data[block_data == np.iinfo(block['data'].dtype).max] = np.nan
if 'gains' in block:
block_data *= block['gains'][:]
if 'offsets' in block:
block_data += block['offsets'][:]
data.append(block_data)
if no_blocks > 0:
data = np.vstack(data)
return np.array(time).astype(np.float64), np.array(data).astype(dtype)
def save(filename, data, **kwargs):
"""Save a 'General Time Series Data Format'-hdf5 datafile
Additional datablocks can be appended later using gtsdf.append_block
Parameters
----------
filename : str
data : array_like, shape (no_observations, no_attributes)
name : str, optional
Name of dataset
description : str, optional
Description of dataset
attribute_names : array_like, shape (no_attributes,), optional
Names of attributes
attribute_units : array_like, shape (no_attributes,), optinoal
Units of attributes
attribute_descriptions : array_like, shape(no_attributes,), optional
Descriptions of attributes
time : array_like, shape (no_observations, ), optional
Time, default is [0..no_observations-1]
time_start : int or float, optional
Time offset (e.g. start time in seconds since 1/1/1970), default is 0, see notes
time_step : int or float, optional
Time scale factor (e.g. 1/sample frequency), default is 1, see notes
dtype : data-type, optional
Data type of saved data array, default uint16.\n
Recommended choices:
- uint16: Data is compressed into 2 byte integers using a gain and offset factor for each attribute
- float64: Data is stored with high precision using 8 byte floats
Notes
-----
Time can be specified by either
- time (one value for each observation). Required inhomogeneous time distributions
- time_start and/or time_step (one or two values), Recommended for homogeneous time distributions
- time and time_start and/or time_step (one value for each observation + one or two values)
When reading the file, the returned time-array is calculated as time * time_step + time_start
See Also
--------
gtsdf, append_block, load
Examples
--------
>>> import gtsdf
>>> data = np.arange(12).reshape(6,2)
>>> gtsdf.save('test.hdf5', data)
>>> gtsdf.save('test.hdf5', data, name='MyDataset',
description='MyDatasetDescription',
attribute_names=['Att1', 'Att2'],
attribute_units=['m', "m/s"],
attribute_descriptions=['Att1Desc', 'Att2Desc'],
time = np.array([0,1,2,6,7,8]),
time_start = 10,
time_step=2,
dtype=np.float64)
"""
if not filename.lower().endswith('.hdf5'):
filename += ".hdf5"
# exist_ok does not exist in Python27
if not os.path.exists(os.path.dirname(os.path.abspath(filename))):
os.makedirs(os.path.dirname(os.path.abspath(filename))) #, exist_ok=True)
_save_info(filename, data.shape, **kwargs)
append_block(filename, data, **kwargs)
def _save_info(filename, data_shape, **kwargs):
f = h5py.File(filename, "w")
try:
f.attrs["type"] = "General time series data format"
no_observations, no_attributes = data_shape
if 'name' in kwargs:
f.attrs['name'] = kwargs['name']
if 'description' in kwargs:
f.attrs['description'] = kwargs['description']
f.attrs['no_attributes'] = no_attributes
if 'attribute_names' in kwargs:
if no_attributes:
assert len(kwargs['attribute_names']) == no_attributes, "len(attribute_names)=%d but data shape is %s" % (len(kwargs['attribute_names']), data_shape)
f.create_dataset("attribute_names", data=np.array([v.encode('utf-8') for v in kwargs['attribute_names']]))
if 'attribute_units' in kwargs:
if no_attributes:
assert(len(kwargs['attribute_units']) == no_attributes)
f.create_dataset("attribute_units", data=np.array([v.encode('utf-8') for v in kwargs['attribute_units']]))
if 'attribute_descriptions' in kwargs:
if no_attributes:
assert(len(kwargs['attribute_descriptions']) == no_attributes)
f.create_dataset("attribute_descriptions", data=np.array([v.encode('utf-8') for v in kwargs['attribute_descriptions']]))
f.attrs['no_blocks'] = 0
except Exception:
raise
finally:
f.close()
def append_block(filename, data, **kwargs):
"""Append a data block and corresponding time data to already existing file
Parameters
----------
filename : str
data : array_like, shape (no_observations, no_attributes)
time : array_like, shape (no_observations, ), optional
Time, default is [0..no_observations-1]
time_start : int or float, optional
Time offset (e.g. start time in seconds since 1/1/1970), default is 0, see notes
time_step : int or float, optional
Time scale factor (e.g. 1/sample frequency), default is 1, see notes
dtype : data-type, optional
Data type of saved data array, default uint16.\n
Recommended choices:
- uint16: Data is compressed into 2 byte integers using a gain and offset factor for each attribute
- float64: Data is stored with high precision using 8 byte floats
Notes
-----
Time can be specified by either
- time (one value for each observation). Required inhomogeneous time distributions
- time_start and/or time_step (one or two values), Recommended for homogeneous time distributions
- time and time_start and/or time_step (one value for each observation + one or two values)
When reading the file, the returned time-array is calculated as time * time_step + time_start
See Also
--------
gtsdf, save
Examples
--------
>>> import gtsdf
>>> data = np.arange(12).reshape(6,2)
>>> gtsdf.save('test.hdf5', data)
>>> gtsdf.append_block('test.hdf5', data+6)
>>> time, data, info = gtsdf.load('test.hdf5')
>>> print time
[ 0. 1. 2. 3. 4. 5.]
>>> print data
[[ 0. 1.]
[ 2. 3.]
[ 4. 5.]
[ 6. 7.]
[ 8. 9.]
[ 10. 11.]]
>>> print info
{'no_blocks': 2, 'type': 'General time series data format', 'name': 'test', 'no_attributes': 2}
"""
try:
f = h5py.File(filename, "a")
check_type(f)
no_observations, no_attributes = data.shape
assert(no_attributes == f.attrs['no_attributes'])
blocknr = f.attrs['no_blocks']
if blocknr == 0:
dtype = kwargs.get('dtype', np.uint16)
else:
dtype = f[block_name_fmt % 0]['data'].dtype
if dtype == np.uint16:
if no_observations < 12: # size with float32<1.2*size with uint16
dtype = np.float32
block = f.create_group(block_name_fmt % blocknr)
if 'time' in kwargs:
assert(len(kwargs['time']) == no_observations)
block.create_dataset('time', data=kwargs['time'])
if 'time_step' in kwargs:
time_step = kwargs['time_step']
block.attrs['time_step'] = np.float64(time_step)
if 'time_start' in kwargs:
block.attrs['time_start'] = np.float64(kwargs['time_start'])
pct_res = np.array([1])
if "int" in str(dtype):
if np.any(np.isinf(data)):
f.close()
raise ValueError ("Int compression does not support 'inf'\nConsider removing outliers or use float datatype")
nan = np.isnan(data)
non_nan_data = ma.masked_array(data, nan)
offsets = np.min(non_nan_data, 0)
try:
data = np.copy(data).astype(np.float64)
except MemoryError:
data = np.copy(data)
data -= offsets
with warnings.catch_warnings():
warnings.simplefilter("ignore") # ignore warning caused by abs(nan) and np.nanmax(nan)
pct_res = (np.percentile(data[~np.isnan(data)], 75, 0) - np.percentile(data[~np.isnan(data)], 25, 0)) / np.nanmax(np.abs(data), 0) # percent of resolution for middle half of data
gains = np.max(non_nan_data - offsets, 0).astype(np.float64) / (np.iinfo(dtype).max - 1) #-1 to save value for NaN
not0 = np.where(gains != 0)
data[:, not0] /= gains[not0]
data = data.astype(dtype)
data[nan] = np.iinfo(dtype).max
block.create_dataset('gains', data=gains)
block.create_dataset('offsets', data=offsets)
block.create_dataset("data", data=data.astype(dtype))
f.attrs['no_blocks'] = blocknr + 1
f.close()
if "int" in str(dtype):
int_res = (np.iinfo(dtype).max - np.iinfo(dtype).min)
with np.errstate(invalid='ignore'):
if min(pct_res[pct_res > 0]) * int_res < 256:
raise Warning("Less than 256 values are used to represent 50%% of the values in column(s): %s\nConsider removing outliers or use float datatype" % np.where(pct_res[pct_res > 0] * int_res < 256)[0])
except Exception:
try:
f.close()
except:
pass
raise
def load_pandas(filename, dtype=None):
import pandas as pd
time, data, info = load(filename, dtype)
df = pd.DataFrame()
df["Time"] = time
df["Date"] = [from_unix(t) for t in time]
for n, d in zip(info['attribute_names'], data.T):
df[n] = d
return df
def check_type(f):
if 'type' not in f.attrs or \
(f.attrs['type'].lower() != "general time series data format" and f.attrs['type'].lower() != b"general time series data format"):
raise ValueError("HDF5 file must contain a 'type'-attribute with the value 'General time series data format'")
if 'no_blocks' not in f.attrs:
raise ValueError("HDF5 file must contain an attribute named 'no_blocks'")
def _get_statistic(time, data, statistics=['min','mean','max','std','eq3','eq4','eq6','eq8','eq10','eq12']):
def get_stat(stat):
if hasattr(np, stat):
return getattr(np,stat)(data,0)
elif (stat.startswith("eq") and stat[2:].isdigit()):
m = float(stat[2:])
return [eq_load(sensor, 46, m, time[-1]-time[0]+time[1]-time[0])[0][0] for sensor in data.T]
return np.array([get_stat(stat) for stat in statistics]).T
def _add_statistic_data(file, stat_data, statistics=['min','mean','max','std','eq3','eq4','eq6','eq8','eq10','eq12']):
f = h5py.File(file, "a")
stat_grp = f.create_group("Statistic")
stat_grp.create_dataset("statistic_names", data=np.array([v.encode('utf-8') for v in statistics]))
stat_grp.create_dataset("statistic_data", data=stat_data.astype(np.float))
f.close()
def add_statistic(file, statistics=['min','mean','max','std','eq3','eq4','eq6','eq8','eq10','eq12']):
time, data, info = load(file)
stat_data = _get_statistic(time, data, statistics)
_add_statistic_data(file, stat_data, statistics)
def load_statistic(filename):
f = _open_h5py_file(filename)
info = _load_info(f)
names = decode(f['Statistic']['statistic_names'])
data =np.array(f['Statistic']['statistic_data'])
return pd.DataFrame(data, columns=names), info
def compress2statistics(filename, statistics=['min','mean','max','std','eq3','eq4','eq6','eq8','eq10','eq12']):
time, data, info = load(filename)
stat_data = _get_statistic(time, data, statistics)
_save_info(filename, data.shape, **info)
_add_statistic_data(filename, stat_data, statistics)
import warnings
from wetb.gtsdf.unix_time import from_unix
from wetb.utils.postprocs import statistics
try:
import h5py
except ImportError as e:
raise ImportError("HDF5 library cannot be loaded. Windows XP is a known cause of this problem\n%s" % e)
import os
import numpy as np
import numpy.ma as ma
import xarray as xr
import glob
import tqdm
import inspect
block_name_fmt = "block%04d"
def load(filename, dtype=None):
"""Load a 'General Time Series Data Format'-hdf5 datafile
Parameters
----------
filename : str or h5py.File
filename or open file object
dtype: data type, optional
type of returned data array, e.g. float16, float32 or float64.
If None(default) the type of the returned data depends on the type of the file data
Returns
-------
time : ndarray(dtype=float64), shape (no_observations,)
time
data : ndarray(dtype=dtype), shape (no_observations, no_attributes)
data
info : dict
info containing:
- type: "General Time Series Data Format"
- name: name of dataset or filename if not present in file
- no_attributes: Number of attributes
- no_blocks: Number of datablocks
- [description]: description of dataset or "" if not present in file
- [attribute_names]: list of attribute names
- [attribute_units]: list of attribute units
- [attribute_descriptions]: list of attribute descriptions
See Also
--------
gtsdf, save
Examples
--------
>>> import gtsdf
>>> data = np.arange(6).reshape(3,2)
>>> gtsdf.save('test.hdf5', data)
>>> time, data, info = gtsdf.load('test.hdf5')
>>> print time
[ 0. 1. 2.]
>>> print data
[[ 0. 1.]
[ 2. 3.]
[ 4. 5.]]
>>> print info
{'no_blocks': 1, 'type': 'General time series data format', 'name': 'test', 'no_attributes': 2, 'description': ''}
>>> gtsdf.save('test.hdf5', data, name='MyDataset',
description='MyDatasetDescription',
attribute_names=['Att1', 'Att2'],
attribute_units=['m', "m/s"],
attribute_descriptions=['Att1Desc', 'Att2Desc'],
time = np.array([0,1,4]),
time_start = 10,
time_step=2,
dtype=np.float64)
>>> time, data, info = gtsdf.load('test.hdf5')
>>> print time
[ 10. 12. 18.]
>>> print data
[[ 0. 1.]
[ 2. 3.]
[ 4. 5.]]
>>> print info
{'attribute_names': array(['Att1', 'Att2'], dtype='|S4'),
'attribute_units': array(['m', 'm/s'], dtype='|S3'),
'attribute_descriptions': array(['Att1Desc', 'Att2Desc'], dtype='|S8'),
'name': 'MyDataset',
'no_attributes': 2,
'no_blocks': 1,
'type': 'General time series data format',
'description': 'MyDatasetDescription'}
"""
f = _open_h5py_file(filename)
try:
info = _load_info(f)
time, data = _load_timedata(f, dtype)
return time, data, info
finally:
try:
f.close()
except BaseException:
pass
def _open_h5py_file(filename):
if isinstance(filename, h5py.File):
f = filename
filename = f.filename
else:
assert os.path.isfile(filename), "File, %s, does not exists" % filename
f = h5py.File(filename, 'r')
return f
def decode(v):
if isinstance(v, bytes):
return v.decode('latin1')
elif hasattr(v, 'len'):
return [decode(v_) for v_ in v]
return v
def _load_info(f):
info = {k: decode(v) for k, v in f.attrs.items()}
check_type(f)
if 'name' not in info:
info['name'] = os.path.splitext(os.path.basename(f.filename))[0]
if 'attribute_names' in f:
info['attribute_names'] = [v.decode('latin1') for v in f['attribute_names']]
if 'attribute_units' in f:
info['attribute_units'] = [v.decode('latin1') for v in f['attribute_units']]
if 'attribute_descriptions' in f:
info['attribute_descriptions'] = [v.decode('latin1') for v in f['attribute_descriptions']]
try:
info['dtype'] = f[block_name_fmt % 0]['data'].dtype
except BaseException:
pass
return info
def _load_timedata(f, dtype):
no_blocks = f.attrs['no_blocks']
if (block_name_fmt % 0) not in f:
raise ValueError("HDF5 file must contain a group named '%s'" % (block_name_fmt % 0))
block0 = f[block_name_fmt % 0]
if 'data' not in block0:
raise ValueError("group %s must contain a dataset called 'data'" % (block_name_fmt % 0))
_, no_attributes = block0['data'].shape
if dtype is None:
file_dtype = f[block_name_fmt % 0]['data'].dtype
if "float" in str(file_dtype):
dtype = file_dtype
elif file_dtype in [np.int8, np.uint8, np.int16, np.uint16]:
dtype = np.float32
else:
dtype = np.float64
time = []
data = []
for i in range(no_blocks):
try:
block = f[block_name_fmt % i]
except Error:
continue
no_observations, no_attributes = block['data'].shape
block_time = (block.get('time', np.arange(no_observations))[:]).astype(np.float64)
if 'time_step' in block.attrs:
block_time *= block.attrs['time_step']
if 'time_start' in block.attrs:
block_time += block.attrs['time_start']
time.extend(block_time)
block_data = block['data'][:].astype(dtype)
if "int" in str(block['data'].dtype):
block_data[block_data == np.iinfo(block['data'].dtype).max] = np.nan
if 'gains' in block:
block_data *= block['gains'][:]
if 'offsets' in block:
block_data += block['offsets'][:]
data.append(block_data)
if no_blocks > 0:
data = np.vstack(data)
return np.array(time).astype(np.float64), np.array(data).astype(dtype)
def save(filename, data, **kwargs):
"""Save a 'General Time Series Data Format'-hdf5 datafile
Additional datablocks can be appended later using gtsdf.append_block
Parameters
----------
filename : str
data : array_like, shape (no_observations, no_attributes)
name : str, optional
Name of dataset
description : str, optional
Description of dataset
attribute_names : array_like, shape (no_attributes,), optional
Names of attributes
attribute_units : array_like, shape (no_attributes,), optinoal
Units of attributes
attribute_descriptions : array_like, shape(no_attributes,), optional
Descriptions of attributes
time : array_like, shape (no_observations, ), optional
Time, default is [0..no_observations-1]
time_start : int or float, optional
Time offset (e.g. start time in seconds since 1/1/1970), default is 0, see notes
time_step : int or float, optional
Time scale factor (e.g. 1/sample frequency), default is 1, see notes
dtype : data-type, optional
Data type of saved data array, default uint16.\n
Recommended choices:
- uint16: Data is compressed into 2 byte integers using a gain and offset factor for each attribute
- float64: Data is stored with high precision using 8 byte floats
Notes
-----
Time can be specified by either
- time (one value for each observation). Required inhomogeneous time distributions
- time_start and/or time_step (one or two values), Recommended for homogeneous time distributions
- time and time_start and/or time_step (one value for each observation + one or two values)
When reading the file, the returned time-array is calculated as time * time_step + time_start
See Also
--------
gtsdf, append_block, load
Examples
--------
>>> import gtsdf
>>> data = np.arange(12).reshape(6,2)
>>> gtsdf.save('test.hdf5', data)
>>> gtsdf.save('test.hdf5', data, name='MyDataset',
description='MyDatasetDescription',
attribute_names=['Att1', 'Att2'],
attribute_units=['m', "m/s"],
attribute_descriptions=['Att1Desc', 'Att2Desc'],
time = np.array([0,1,2,6,7,8]),
time_start = 10,
time_step=2,
dtype=np.float64)
"""
if not filename.lower().endswith('.hdf5'):
filename += ".hdf5"
# exist_ok does not exist in Python27
if not os.path.exists(os.path.dirname(os.path.abspath(filename))):
os.makedirs(os.path.dirname(os.path.abspath(filename))) # , exist_ok=True)
_save_info(filename, data.shape, **kwargs)
append_block(filename, data, **kwargs)
def _save_info(filename, data_shape, **kwargs):
f = h5py.File(filename, "w")
try:
f.attrs["type"] = "General time series data format"
no_observations, no_attributes = data_shape
if 'name' in kwargs:
f.attrs['name'] = kwargs['name']
if 'description' in kwargs:
f.attrs['description'] = kwargs['description']
f.attrs['no_attributes'] = no_attributes
if 'attribute_names' in kwargs:
if no_attributes:
assert len(kwargs['attribute_names']) == no_attributes, "len(attribute_names)=%d but data shape is %s" % (
len(kwargs['attribute_names']), data_shape)
f.create_dataset("attribute_names", data=np.array([v.encode('utf-8') for v in kwargs['attribute_names']]))
if 'attribute_units' in kwargs:
if no_attributes:
assert(len(kwargs['attribute_units']) == no_attributes)
f.create_dataset("attribute_units", data=np.array([v.encode('utf-8') for v in kwargs['attribute_units']]))
if 'attribute_descriptions' in kwargs:
if no_attributes:
assert(len(kwargs['attribute_descriptions']) == no_attributes)
f.create_dataset("attribute_descriptions", data=np.array(
[v.encode('utf-8') for v in kwargs['attribute_descriptions']]))
f.attrs['no_blocks'] = 0
except Exception:
raise
finally:
f.close()
def append_block(filename, data, **kwargs):
"""Append a data block and corresponding time data to already existing file
Parameters
----------
filename : str
data : array_like, shape (no_observations, no_attributes)
time : array_like, shape (no_observations, ), optional
Time, default is [0..no_observations-1]
time_start : int or float, optional
Time offset (e.g. start time in seconds since 1/1/1970), default is 0, see notes
time_step : int or float, optional
Time scale factor (e.g. 1/sample frequency), default is 1, see notes
dtype : data-type, optional
Data type of saved data array, default uint16.\n
Recommended choices:
- uint16: Data is compressed into 2 byte integers using a gain and offset factor for each attribute
- float64: Data is stored with high precision using 8 byte floats
Notes
-----
Time can be specified by either
- time (one value for each observation). Required inhomogeneous time distributions
- time_start and/or time_step (one or two values), Recommended for homogeneous time distributions
- time and time_start and/or time_step (one value for each observation + one or two values)
When reading the file, the returned time-array is calculated as time * time_step + time_start
See Also
--------
gtsdf, save
Examples
--------
>>> import gtsdf
>>> data = np.arange(12).reshape(6,2)
>>> gtsdf.save('test.hdf5', data)
>>> gtsdf.append_block('test.hdf5', data+6)
>>> time, data, info = gtsdf.load('test.hdf5')
>>> print time
[ 0. 1. 2. 3. 4. 5.]
>>> print data
[[ 0. 1.]
[ 2. 3.]
[ 4. 5.]
[ 6. 7.]
[ 8. 9.]
[ 10. 11.]]
>>> print info
{'no_blocks': 2, 'type': 'General time series data format', 'name': 'test', 'no_attributes': 2}
"""
try:
f = h5py.File(filename, "a")
check_type(f)
no_observations, no_attributes = data.shape
assert(no_attributes == f.attrs['no_attributes'])
blocknr = f.attrs['no_blocks']
if blocknr == 0:
dtype = kwargs.get('dtype', np.uint16)
else:
dtype = f[block_name_fmt % 0]['data'].dtype
if dtype == np.uint16:
if no_observations < 12: # size with float32<1.2*size with uint16
dtype = np.float32
block = f.create_group(block_name_fmt % blocknr)
if 'time' in kwargs:
assert(len(kwargs['time']) == no_observations)
block.create_dataset('time', data=kwargs['time'])
if 'time_step' in kwargs:
time_step = kwargs['time_step']
block.attrs['time_step'] = np.float64(time_step)
if 'time_start' in kwargs:
block.attrs['time_start'] = np.float64(kwargs['time_start'])
pct_res = np.array([1])
if "int" in str(dtype):
if np.any(np.isinf(data)):
f.close()
raise ValueError(
"Int compression does not support 'inf'\nConsider removing outliers or use float datatype")
nan = np.isnan(data)
non_nan_data = ma.masked_array(data, nan)
offsets = np.min(non_nan_data, 0)
try:
data = np.copy(data).astype(np.float64)
except MemoryError:
data = np.copy(data)
data -= offsets
with warnings.catch_warnings():
warnings.simplefilter("ignore") # ignore warning caused by abs(nan) and np.nanmax(nan)
pct_res = (np.percentile(data[~np.isnan(data)], 75, 0) - np.percentile(data[~np.isnan(data)],
25, 0)) / np.nanmax(np.abs(data), 0) # percent of resolution for middle half of data
gains = np.max(non_nan_data - offsets, 0).astype(np.float64) / \
(np.iinfo(dtype).max - 1) # -1 to save value for NaN
not0 = np.where(gains != 0)
data[:, not0] /= gains[not0]
data = data.astype(dtype)
data[nan] = np.iinfo(dtype).max
block.create_dataset('gains', data=gains)
block.create_dataset('offsets', data=offsets)
block.create_dataset("data", data=data.astype(dtype))
f.attrs['no_blocks'] = blocknr + 1
f.close()
if "int" in str(dtype):
int_res = (np.iinfo(dtype).max - np.iinfo(dtype).min)
with np.errstate(invalid='ignore'):
if min(pct_res[pct_res > 0]) * int_res < 256:
raise Warning("Less than 256 values are used to represent 50%% of the values in column(s): %s\nConsider removing outliers or use float datatype" % np.where(
pct_res[pct_res > 0] * int_res < 256)[0])
except Exception:
try:
f.close()
except BaseException:
pass
raise
def load_pandas(filename, dtype=None):
import pandas as pd
time, data, info = load(filename, dtype)
df = pd.DataFrame()
df["Time"] = time
df["Date"] = [from_unix(t) for t in time]
for n, d in zip(info['attribute_names'], data.T):
df[n] = d
return df
def check_type(f):
if 'type' not in f.attrs or \
(f.attrs['type'].lower() != "general time series data format" and f.attrs['type'].lower() != b"general time series data format"):
raise ValueError("HDF5 file must contain a 'type'-attribute with the value 'General time series data format'")
if 'no_blocks' not in f.attrs:
raise ValueError("HDF5 file must contain an attribute named 'no_blocks'")
def get_postproc(postproc_function, file_h5py, file, time_data_info=None, **kwargs) -> xr.DataArray:
"""
Call a given postproc function with its postproc-specific and file-specific parameters, and return a DataArray from its output if any.
Parameters
----------
postproc_function : function
Function that executes a given postproc
file_h5py : h5py.File object
h5py.File object in append mode from hdf5 file
file : str
Absolute path to hdf5 file
time_data_info : tuple, optional
Tuple containing the arrays time, data and the dictionary info. If passed, it is not necessary
to load them from the hdf5 file. The default is None.
**kwargs : dict, optional
Dictionary containing the postproc-specific parameters that postproc_function takes.
Returns
-------
postproc_output : xarray.DataArray
DataArray from postproc_function output if any, otherwise None. Its name will be
the same as the postproc_function
"""
print(f"Calculating {postproc_function.__name__} for '{file}'")
if time_data_info is None:
time_data_info = load(file)
time, data, info = time_data_info
postproc_function_args = inspect.signature(postproc_function).parameters.keys()
file_args = {}
for item in ['file_h5py', 'file', 'time', 'data', 'info']:
if item in postproc_function_args:
file_args[item] = locals()[item]
postproc_output = postproc_function(**file_args, **kwargs)
if postproc_output is None:
return
if isinstance(postproc_output, np.ndarray) or isinstance(postproc_output, list):
postproc_output = xr.DataArray(postproc_output)
postproc_output.name = postproc_function.__name__
return postproc_output
def write_postproc(file, postproc_output) -> None:
"""
Write a postproc DataArray to hdf5 file.\n
A block called the same as the DataArray is created under the block 'postproc'.
A dataset called 'data' is created under the previous block for the DataArray data
with its dims in the attribute "dims", and also a dataset for each DataArray coordinate
with its dimension in the attribute "dim".
Parameters
----------
file : h5py.File object
h5py.File object in append mode from hdf5 file
postproc_output : xarray.DataArray
DataArray whose name, data, dims and coords are written into hdf5 file.
Returns
-------
None
"""
if postproc_output is None:
return
print(f"Writing {postproc_output.name} to '{file.filename}'")
if 'postproc' not in file:
file.create_group('postproc')
if postproc_output.name in file['postproc']:
del file['postproc'][postproc_output.name]
file['postproc'].create_group(postproc_output.name)
file['postproc'][postproc_output.name].create_dataset(name='data', data=postproc_output.astype(float))
file['postproc'][postproc_output.name]['data'].attrs['dims'] = [d.encode('utf-8') for d in postproc_output.dims]
for coord in postproc_output.coords:
if np.issubdtype(postproc_output.coords[coord].dtype, np.str_):
file['postproc'][postproc_output.name].create_dataset(name=coord, data=np.array([v.encode('utf-8') for v in postproc_output.coords[coord].values]))
else:
file['postproc'][postproc_output.name].create_dataset(name=coord, data=postproc_output.coords[coord].astype(float))
file['postproc'][postproc_output.name][coord].attrs['dim'] = postproc_output.coords[coord].dims[0].encode('utf-8')
def add_postproc(file, config={statistics: {'statistics': ['min', 'mean', 'max', 'std']}}) -> list:
"""
Call get_postproc and write_postproc for each postproc in config.
Parameters
----------
file : str
Absolute path to hdf5 file
config : dict, optional
Dictionary containing postprocs. Its keys are functions and its values are dicts for their postproc-specific parameters.\n
Example:\n
{statistics: {'statistics': ['min', 'mean', 'max', 'std']},\n
extreme_loads: {'sensors_info': [('Tower base shear force', 0, 1), ('Blade 1 '.' bending moment', 9, 10)]}}\n
The default is {statistics: {'statistics': ['min', 'mean', 'max', 'std']}}.
Returns
-------
postproc_output_list : list of DataArrays
List of DataArrays output by each postproc function
"""
time_data_info = load(file)
f = h5py.File(file, "a")
postproc_output_list = []
for postproc, kwargs in config.items():
postproc_output = get_postproc(postproc_function=postproc, file_h5py=f, file=file, time_data_info=time_data_info, **kwargs)
write_postproc(file=f, postproc_output=postproc_output)
if postproc_output is not None:
postproc_output_list.append(postproc_output)
f.close()
return postproc_output_list
def load_postproc(filename,
config={statistics: {'statistics': ['min', 'mean', 'max', 'std']}},
force_recompute=False) -> list:
"""
Read data from hdf5 file for each postproc in config and return a list of DataArrays. If any postproc in config is missing in the hdf5 file
it will compute it and write it as well. This can be also be done to rewrite postprocs in config that already are in the hdf5 file
by passing force_recompute=True.
Parameters
----------
filename : str
Absolute path to hdf5 file
config : dict, optional
Dictionary containing postprocs. Its keys are functions and its values are dicts for their postproc-specific parameters.\n
Example:\n
{statistics: {'statistics': ['min', 'mean', 'max', 'std']},\n
extreme_loads: {'sensors_info': [('Tower base shear force', 0, 1), ('Blade 1 '.' bending moment', 9, 10)]}}\n
The default is {statistics: {'statistics': ['min', 'mean', 'max', 'std']}}.
force_recompute : bool, optional
Whether all postprocs in config should be calculated and written to hdf5 file (True) or only the ones that
are missing (False). The default is False.
Returns
-------
data_arrays: list of DataArrays
List of DataArrays from each block under 'postproc' in hdf5 file that also is in config
"""
if force_recompute:
postproc_output_list = add_postproc(filename, config)
return postproc_output_list
f = _open_h5py_file(filename)
if 'postproc' not in f:
f.close()
postproc_output_list = add_postproc(filename, config)
return postproc_output_list
# Check which postprocs in config are missing in the hdf5 file
config_missing = {}
for postproc, args in config.items():
if postproc.__name__ not in f['postproc']:
config_missing[postproc] = args
# Add missing postprocs in config to hdf5 file, if there are any
if config_missing != {}:
f.close()
add_postproc(filename, config_missing)
f = _open_h5py_file(filename)
# Return list of DataArrays for all postprocs in config
data_arrays = []
for postproc in config.keys():
try:
data_arrays.append(xr.DataArray(name=postproc.__name__,
data=f['postproc'][postproc.__name__]['data'],
dims=f['postproc'][postproc.__name__]['data'].attrs['dims'],
coords={coord: ([v.decode('latin1') for v in f['postproc'][postproc.__name__][coord]]
if np.issubdtype(f['postproc'][postproc.__name__][coord].dtype, bytes)
else
f['postproc'][postproc.__name__][coord])
if coord in f['postproc'][postproc.__name__]['data'].attrs['dims']
else
((f['postproc'][postproc.__name__][coord].attrs['dim'], [v.decode('latin1') for v in f['postproc'][postproc.__name__][coord]])
if np.issubdtype(f['postproc'][postproc.__name__][coord].dtype, bytes)
else
(f['postproc'][postproc.__name__][coord].attrs['dim'], f['postproc'][postproc.__name__][coord]))
for coord in f['postproc'][postproc.__name__] if coord != 'data'}))
except:
continue
f.close()
return data_arrays
def collect_postproc(folder, recursive=True,
config={statistics: {'statistics': ['min', 'mean', 'max', 'std']}},
force_recompute=False) -> list:
"""
Call load_postproc for all hdf5 files in folder and collect into a single dataArray for each postproc in config.
Parameters
----------
folder : str
Absolute path to folder containing hdf5 files
recursive : bool, optional
Whether to include hdf5 files in subfolders (True) or not (False). The default is True.
config : dict, optional
Dictionary containing postprocs. Its keys are functions and its values are dicts for their postproc-specific parameters.\n
Example:\n
{statistics: {'statistics': ['min', 'mean', 'max', 'std']},\n
extreme_loads: {'sensors_info': [('Tower base shear force', 0, 1), ('Blade 1 '.' bending moment', 9, 10)]}}\n
The default is {statistics: {'statistics': ['min', 'mean', 'max', 'std']}}.
force_recompute : bool, optional
Whether all postprocs in config should be calculated and written to hdf5 file (True) or only the ones that
are missing (False). The default is False.
Returns
-------
data_arrays: list of DataArrays
List of DataArrays from each block under 'postproc' in hdf5 file that also is in config. Each DataArray has
an extra dimension 'filename' since DataArrays from all files have been collected into a single one
"""
if recursive:
p = os.path.join('.', folder, '**', '*.hdf5')
else:
p = os.path.join('.', folder, '*.hdf5')
fn_lst = sorted(glob.glob(p, recursive=recursive))
if not fn_lst:
raise Exception(f"No *.hdf5 files found in {os.path.abspath(os.path.join('.', folder))}")
postproc_output_list_all_files = [load_postproc(fn, config=config, force_recompute=force_recompute) for fn in tqdm.tqdm(fn_lst)]
data_arrays = []
for i in range(len(postproc_output_list_all_files[0])):
name = postproc_output_list_all_files[0][i].name
data = np.array([f[i] for f in postproc_output_list_all_files])
dims = ('filename',) + postproc_output_list_all_files[0][i].dims
coords = postproc_output_list_all_files[0][i].coords
data_arrays.append(xr.DataArray(name=name, data=data, dims=dims, coords=coords))
data_arrays[-1].coords['filename'] = [os.path.relpath(fn, '.') for fn in fn_lst]
return data_arrays
def compress2postproc(file, config={statistics: {'statistics': ['min', 'mean', 'max', 'std']}}) -> None:
"""
Compress hdf5 file into only the postproc data, removing all time series.
Parameters
----------
file : str
Absolute path to hdf5 file
config : dict, optional
Dictionary containing postprocs. Its keys are functions and its values are dicts for their postproc-specific parameters.\n
Example:\n
{statistics: {'statistics': ['min', 'mean', 'max', 'std']},\n
extreme_loads: {'sensors_info': [('Tower base shear force', 0, 1), ('Blade 1 '.' bending moment', 9, 10)]}}\n
The default is {statistics: {'statistics': ['min', 'mean', 'max', 'std']}}.
Returns
-------
None
"""
time_data_info = load(file)
time, data, info = time_data_info
_save_info(file, data.shape, **info)
f = h5py.File(file, "a")
for postproc, kwargs in config.items():
postproc_output = get_postproc(postproc_function=postproc, file_h5py=f, file=file, time_data_info=time_data_info, **kwargs)
write_postproc(file=f, postproc_output=postproc_output)
f.close()
\ No newline at end of file
No preview for this file type
......@@ -3,14 +3,6 @@ Created on 12/09/2013
@author: mmpe
'''
from __future__ import division
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import absolute_import
from builtins import super
from builtins import range
from future import standard_library
standard_library.install_aliases()
import h5py
import numpy as np
......@@ -22,6 +14,7 @@ import os
tmp_path = os.path.dirname(__file__) + "/tmp/"
tfp = os.path.dirname(os.path.abspath(__file__)) + "/test_files/"
class Test_gsdf(unittest.TestCase):
def setUp(self):
unittest.TestCase.setUp(self)
......@@ -31,30 +24,29 @@ class Test_gsdf(unittest.TestCase):
@classmethod
def tearDownClass(cls):
super(Test_gsdf, cls).tearDownClass()
#shutil.rmtree(tmp_path)
# shutil.rmtree(tmp_path)
def test_minimum_requirements (self):
def test_minimum_requirements(self):
fn = tmp_path + "minimum.hdf5"
f = h5py.File(fn, "w")
#no type
# no type
self.assertRaises(ValueError, gtsdf.load, fn)
f.attrs["type"] = "General time series data format"
#no no_blocks
# no no_blocks
self.assertRaises(ValueError, gtsdf.load, fn)
f.attrs["no_blocks"] = 0
#no block0000
# no block0000
self.assertRaises(ValueError, gtsdf.load, fn)
b = f.create_group("block0000")
#no data
# no data
self.assertRaises(ValueError, gtsdf.load, fn)
b.create_dataset("data", data=np.empty((0, 0)))
f.close()
gtsdf.load(fn)
def test_save_no_hdf5_ext(self):
fn = tmp_path + "no_hdf5_ext"
gtsdf.save(fn, np.arange(12).reshape(4, 3))
......@@ -67,7 +59,6 @@ class Test_gsdf(unittest.TestCase):
_, _, info = gtsdf.load(fn)
self.assertEqual(info['name'], 'filename')
def test_load_fileobject(self):
fn = tmp_path + "fileobject.hdf5"
gtsdf.save(fn, np.arange(12).reshape(4, 3))
......@@ -108,14 +99,12 @@ class Test_gsdf(unittest.TestCase):
time, _, _ = gtsdf.load(fn)
np.testing.assert_array_equal(time, range(4, 10))
def test_time_offset(self):
fn = tmp_path + 'time.hdf5'
gtsdf.save(fn, np.arange(12).reshape(6, 2), time=range(6), time_start=4)
time, _, _ = gtsdf.load(fn)
np.testing.assert_array_equal(time, range(4, 10))
def test_time_gain_offset(self):
fn = tmp_path + 'time.hdf5'
gtsdf.save(fn, np.arange(12).reshape(6, 2), time=range(6), time_step=1 / 4, time_start=4)
......@@ -148,7 +137,6 @@ class Test_gsdf(unittest.TestCase):
_, data, _ = gtsdf.load(fn)
np.testing.assert_array_equal(data, np.arange(12).reshape(6, 2))
def test_all(self):
fn = tmp_path + "all.hdf5"
gtsdf.save(fn, np.arange(12).reshape(6, 2),
......@@ -192,7 +180,6 @@ class Test_gsdf(unittest.TestCase):
self.assertNotIn('gains', f['block0001'])
f.close()
def test_nan_float(self):
fn = tmp_path + 'nan.hdf5'
d = np.arange(12, dtype=np.float32).reshape(6, 2)
......@@ -201,8 +188,6 @@ class Test_gsdf(unittest.TestCase):
_, data, _ = gtsdf.load(fn)
np.testing.assert_array_almost_equal(data, d, 4)
def test_outlier(self):
fn = tmp_path + 'outlier.hdf5'
d = np.arange(12, dtype=np.float32).reshape(6, 2)
......@@ -234,33 +219,25 @@ class Test_gsdf(unittest.TestCase):
self.assertEqual(time[1], 0.05)
self.assertEqual(data[1, 1], 11.986652374267578)
self.assertEqual(info['attribute_names'][1], "WSP gl. coo.,Vy")
def test_loadhdf5File(self):
f = h5py.File(tfp + 'test.hdf5')
time, data, info = gtsdf.load(f)
self.assertEqual(time[1], 0.05)
self.assertEqual(data[1, 1], 11.986652374267578)
self.assertEqual(info['attribute_names'][1], "WSP gl. coo.,Vy")
def test_gtsdf_dataset(self):
ds = gtsdf.Dataset(tfp+'test.hdf5')
self.assertEqual(ds.data.shape, (2440,49))
ds = gtsdf.Dataset(tfp + 'test.hdf5')
self.assertEqual(ds.data.shape, (2440, 49))
self.assertEqual(ds('Time')[1], 0.05)
self.assertEqual(ds.Time[1], 0.05)
self.assertRaisesRegex(AttributeError, "'Dataset' object has no attribute 'Time1'", lambda : ds.Time1)
self.assertRaisesRegex(AttributeError, "'Dataset' object has no attribute 'Time1'", lambda: ds.Time1)
self.assertEqual(ds(2)[1], 12.04148006439209)
n = ds.info['attribute_names'][2]
self.assertEqual(n, "WSP gl. coo.,Vy")
self.assertEqual(ds(n)[1], 12.04148006439209)
if __name__ == "__main__":
......
......@@ -3,25 +3,18 @@ Created on 12/09/2013
@author: mmpe
'''
from __future__ import division
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import absolute_import
from builtins import super
from builtins import range
from future import standard_library
standard_library.install_aliases()
import h5py
import numpy as np
from wetb import gtsdf
import unittest
import os
from wetb.gtsdf.gtsdf import add_postproc, load_postproc, collect_postproc, compress2postproc
import pytest
tmp_path = os.path.dirname(__file__) + "/tmp/"
tfp = os.path.dirname(os.path.abspath(__file__)) + "/test_files/"
class Test_gsdf(unittest.TestCase):
def setUp(self):
unittest.TestCase.setUp(self)
......@@ -31,33 +24,34 @@ class Test_gsdf(unittest.TestCase):
@classmethod
def tearDownClass(cls):
super(Test_gsdf, cls).tearDownClass()
#shutil.rmtree(tmp_path)
# shutil.rmtree(tmp_path)
def test_gtsdf_stat(self):
time, data, info = gtsdf.load(tfp+'test.hdf5')
print (data.shape)
# test_gtsdf_postproc
time, data, info = gtsdf.load(tfp + 'test.hdf5')
fn = tmp_path + "test_stat.hdf5"
gtsdf.save(fn, data, time=time, **info)
gtsdf.add_statistic(fn)
stat_data,info = gtsdf.load_statistic(fn)
self.assertEqual(data[:,0].min(), stat_data.values[0,0])
self.assertEqual(stat_data.shape, (49,10))
def test_gtsdf_compress2stat(self):
time, data, info = gtsdf.load(tfp+'test.hdf5')
add_postproc(fn)
da = load_postproc(fn)
sensor = da[0]
self.assertEqual(data[:, 0].min(), sensor[0].sel(statistic='min'))
assert sensor[0].sensor_name == info['attribute_names'][0]
assert sensor[0].sensor_unit == info['attribute_units'][0]
assert sensor[0].sensor_description == info['attribute_descriptions'][0]
self.assertEqual(da[0].shape, (49, 4))
# test_gtsdf_compress2postproc
time, data, info = gtsdf.load(tfp + 'test.hdf5')
fn = tmp_path + "test_compress2stat.hdf5"
gtsdf.save(fn, data, time=time, **info)
gtsdf.save(tmp_path + "test_compress2stat2.hdf5", data, time=time, dtype=np.float, **info)
gtsdf.compress2statistics(fn)
self.assertLess(os.path.getsize(fn)*50, os.path.getsize(tfp+'test.hdf5'))
del info['dtype']
gtsdf.save(tmp_path + "test_compress2stat2.hdf5", data, time=time, dtype=float, **info)
compress2postproc(fn)
self.assertLess(os.path.getsize(fn) * 30, os.path.getsize(tfp + 'test.hdf5'))
# test_collect_postproc
with pytest.raises(Exception, match=r'No \*\.hdf5 files found in'):
collect_postproc('missing', tmp_path)
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
......
......@@ -3,12 +3,6 @@ Created on 03/12/2015
@author: mmpe
'''
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
import unittest
import numpy as np
import datetime
......@@ -19,14 +13,10 @@ class TestUnixTime(unittest.TestCase):
def test_to_unix(self):
print (np.array([5]))
self.assertEqual(to_unix(datetime.datetime(2016, 2, 2, 13, 6, 25)), 1454418385)
self.assertEqual(to_unix([datetime.datetime(2016, 2, 2, 13, 6, 25),datetime.datetime(2016, 2, 2, 13, 6, 26)]), [1454418385,1454418386])
self.assertNotEqual(to_unix(datetime.datetime(2016, 2, 2, 13, 6, 26)), 1454418385)
self.assertRaises(Exception, to_unix,1)
def test_from_unix(self):
......
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from builtins import zip
from future import standard_library
standard_library.install_aliases()
from datetime import datetime, date
import numpy as np
timestamp0 = datetime.utcfromtimestamp(0)
timestamp0 = datetime(1970, 1, 1)
def to_unix(dateTime):
try:
return (dateTime - timestamp0).total_seconds()
except:
except Exception:
if hasattr(dateTime, "__len__"):
return [(dt - timestamp0).total_seconds() for dt in dateTime]
raise
# def from_unix_old(sec):
# if np.isnan(sec):
# return datetime.utcfromtimestamp(0)
# return datetime.utcfromtimestamp(sec)
day_dict = {}
......@@ -34,9 +27,9 @@ def from_unix(sec):
return datetime.utcfromtimestamp(0)
return datetime.utcfromtimestamp(sec)
else:
sec = np.array(sec).astype(np.float)
ms = np.atleast_1d((sec * 1000000 % 1000000).astype(np.int))
sec = sec.astype(np.int)
sec = np.array(sec).astype(float)
us = np.atleast_1d((sec * 1000000 % 1000000).astype(int))
sec = sec.astype(int)
S = np.atleast_1d(sec % 60)
M = np.atleast_1d(sec % 3600 // 60)
H = np.atleast_1d(sec % 86400 // 3600)
......@@ -45,4 +38,4 @@ def from_unix(sec):
if du not in day_dict:
day_dict[du] = date.fromordinal(719163 + du).timetuple()[:3]
y, m, d = zip(*[day_dict[d_] for d_ in d])
return ([datetime(*ymdhmsu) for ymdhmsu in zip(y, m, d, H.tolist(), M.tolist(), S.tolist(), ms.tolist())])
return ([datetime(*ymdhmsu) for ymdhmsu in zip(y, m, d, H.tolist(), M.tolist(), S.tolist(), us.tolist())])
......@@ -27,36 +27,25 @@ Need to be done:
* add error handling for allmost every thing
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from builtins import int
from builtins import range
from io import open as opent
from builtins import str
from future import standard_library
standard_library.install_aliases()
from builtins import object
import numpy as np
import os
from wetb import gtsdf
# FIXME: numpy doesn't like io.open binary fid in PY27, why is that? As a hack
# workaround, use opent for PY23 compatibility when handling text files,
# and default open for binary
from wetb.prepost import misc
################################################################################
################################################################################
################################################################################
## Read HAWC2 class
# Read HAWC2 class
################################################################################
class ReadHawc2(object):
"""
"""
################################################################################
# read *.sel file
def _ReadSelFile(self):
"""
Some title
......@@ -78,9 +67,9 @@ class ReadHawc2(object):
"""
# read *.sel hawc2 output file for result info
fid = opent(self.FileName + '.sel', 'r')
Lines = fid.readlines()
fid.close()
if self.FileName.lower().endswith('.sel'):
self.FileName = self.FileName[:-4]
Lines = misc.readlines_try_encodings(self.FileName + '.sel')
# findes general result info (number of scans, number of channels,
# simulation time and file format)
temp = Lines[8].split()
......@@ -91,11 +80,16 @@ class ReadHawc2(object):
self.t = np.linspace(0, self.Time, self.NrSc + 1)[1:]
Format = temp[3]
# reads channel info (name, unit and description)
Name = []; Unit = []; Description = [];
Name = []
Unit = []
Description = []
for i in range(0, self.NrCh):
temp = str(Lines[i + 12][12:43]); Name.append(temp.strip())
temp = str(Lines[i + 12][43:54]); Unit.append(temp.strip())
temp = str(Lines[i + 12][54:-1]); Description.append(temp.strip())
temp = str(Lines[i + 12][12:43])
Name.append(temp.strip())
temp = str(Lines[i + 12][43:54])
Unit.append(temp.strip())
temp = str(Lines[i + 12][54:-1])
Description.append(temp.strip())
self.ChInfo = [Name, Unit, Description]
# if binary file format, scaling factors are read
if Format.lower() == 'binary':
......@@ -107,65 +101,73 @@ class ReadHawc2(object):
self.FileFormat = 'HAWC2_ASCII'
################################################################################
# read sensor file for FLEX format
def _ReadSensorFile(self):
# read sensor file used if results are saved in FLEX format
DirName = os.path.dirname(self.FileName + ".int")
try:
fid = opent(DirName + "\sensor ", 'r')
Lines = misc.readlines_try_encodings(os.path.join(DirName, r"sensor"))
except IOError:
print ("can't finde sensor file for FLEX format")
print("can't finde sensor file for FLEX format")
return
Lines = fid.readlines()
fid.close()
# reads channel info (name, unit and description)
self.NrCh = 0
Name = []; Unit = []; Description = [];
Name = []
Unit = []
Description = []
for i in range(2, len(Lines)):
temp = Lines[i]
if not temp.strip():
break
self.NrCh += 1
temp = str(Lines[i][38:45]); Unit.append(temp.strip())
temp = str(Lines[i][45:53]); Name.append(temp.strip())
temp = str(Lines[i][53:]); Description.append(temp.strip())
temp = str(Lines[i][38:45])
Unit.append(temp.strip())
temp = str(Lines[i][45:53])
Name.append(temp.strip())
temp = str(Lines[i][53:])
Description.append(temp.strip())
self.ChInfo = [Name, Unit, Description]
# read general info from *.int file
fid = open(self.FileName + ".int", 'rb')
fid.seek(4 * 19)
if not np.fromfile(fid, 'int32', 1) == self.NrCh:
print ("number of sensors in sensor file and data file are not consisten")
print("number of sensors in sensor file and data file are not consisten")
fid.seek(4 * (self.NrCh) + 8, 1)
temp = np.fromfile(fid, 'f', 2)
self.Freq = 1 / temp[1];
time_start, time_step = np.fromfile(fid, 'f', 2)
self.Freq = 1 / time_step
self.ScaleFactor = np.fromfile(fid, 'f', self.NrCh)
fid.seek(2 * 4 * self.NrCh + 48 * 2)
self.NrSc = len(np.fromfile(fid, 'int16')) / self.NrCh
self.Time = self.NrSc * temp[1]
self.t = np.arange(0, self.Time, temp[1])
self.NrSc = int(len(np.fromfile(fid, 'int16')) / self.NrCh)
self.Time = self.NrSc * time_step
self.t = np.arange(0, self.Time, time_step) + time_start
fid.close()
################################################################################
# init function, load channel and other general result file info
def __init__(self, FileName, ReadOnly=0):
self.FileName = FileName
self.ReadOnly = ReadOnly
self.Iknown = [] # to keep track of what has been read all ready
self.Data = np.zeros(0)
if os.path.isfile(FileName + ".sel"):
self._ReadSelFile()
elif os.path.isfile(self.FileName + ".int"):
self.FileFormat = 'FLEX'
self._ReadSensorFile()
elif os.path.isfile(self.FileName + ".hdf5"):
if FileName.lower().endswith('.sel') or os.path.isfile(FileName + ".sel"):
self._ReadSelFile()
elif FileName.lower().endswith('.int') or os.path.isfile(self.FileName + ".int"):
self.FileFormat = 'FLEX'
self._ReadSensorFile()
elif FileName.lower().endswith('.hdf5') or os.path.isfile(self.FileName + ".hdf5"):
self.FileFormat = 'GTSDF'
self.ReadGtsdf()
else:
print ("unknown file: " + FileName)
print("unknown file: " + FileName)
################################################################################
# Read results in binary format
def ReadBinary(self, ChVec=[]):
if not ChVec:
ChVec = range(0, self.NrCh)
with open(self.FileName + '.dat', 'rb') as fid:
data = np.zeros((self.NrSc, len(ChVec))); j = 0
data = np.zeros((self.NrSc, len(ChVec)))
j = 0
for i in ChVec:
fid.seek(i * self.NrSc * 2, 0)
data[:, j] = np.fromfile(fid, 'int16', self.NrSc) * self.ScaleFactor[i]
......@@ -173,13 +175,15 @@ class ReadHawc2(object):
return data
################################################################################
# Read results in ASCII format
def ReadAscii(self, ChVec=[]):
if not ChVec:
ChVec = range(0, self.NrCh)
temp = np.loadtxt(self.FileName + '.dat', usecols=ChVec)
return temp.reshape((self.NrSc, len(ChVec)))
return temp.reshape((temp.shape[0], len(ChVec)))
################################################################################
# Read results in FLEX format
def ReadFLEX(self, ChVec=[]):
if not ChVec:
ChVec = range(1, self.NrCh)
......@@ -191,20 +195,27 @@ class ReadHawc2(object):
return np.dot(temp[:, ChVec], np.diag(self.ScaleFactor[ChVec]))
################################################################################
# Read results in GTSD format
def ReadGtsdf(self):
self.t, data, info = gtsdf.load(self.FileName + '.hdf5')
self.Time = self.t[-1]
self.ChInfo = [info['attribute_names'],
info['attribute_units'],
info['attribute_descriptions']]
self.NrCh = data.shape[1]
fn = self.FileName
if fn[-5:].lower() != '.hdf5':
fn += '.hdf5'
self.t, data, info = gtsdf.load(fn)
self.Time = self.t
self.ChInfo = [['Time'] + info['attribute_names'],
['s'] + info['attribute_units'],
['Time'] + info['attribute_descriptions']]
self.NrCh = data.shape[1] + 1
self.NrSc = data.shape[0]
self.Freq = self.NrSc / self.Time
self.FileFormat = 'GTSDF'
self.gtsdf_description = info['description']
self.gtsdf_dtype = info['dtype']
data = np.hstack([self.Time[:, np.newaxis], data])
return data
################################################################################
# One stop call for reading all data formats
def ReadAll(self, ChVec=[]):
if not ChVec and not self.FileFormat == 'GTSDF':
ChVec = range(0, self.NrCh)
......@@ -218,11 +229,12 @@ class ReadHawc2(object):
return self.ReadFLEX(ChVec)
################################################################################
# Main read data call, read, save and sort data
def __call__(self, ChVec=[]):
if not ChVec:
ChVec = range(0, self.NrCh)
elif max(ChVec) >= self.NrCh:
print ("to high channel number")
print("to high channel number")
return
# if ReadOnly, read data but no storeing in memory
if self.ReadOnly:
......@@ -231,11 +243,12 @@ class ReadHawc2(object):
# and return all requested channels
else:
# sort into known channels and channels to be read
I1 = [];I2 = [] # I1=Channel mapping, I2=Channels to be read
I1 = []
I2 = [] # I1=Channel mapping, I2=Channels to be read
for i in ChVec:
try:
I1.append(self.Iknown.index(i))
except:
except Exception:
self.Iknown.append(i)
I2.append(i)
I1.append(len(I1))
......@@ -254,7 +267,7 @@ class ReadHawc2(object):
################################################################################
################################################################################
################################################################################
## write HAWC2 class, to be implemented
# write HAWC2 class, to be implemented
################################################################################
if __name__ == '__main__':
......
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
d = None
d = dir()
......@@ -13,6 +7,7 @@ from .ae_file import AEFile
from .at_time_file import AtTimeFile
from .pc_file import PCFile
from . import shear_file
from .st_file import StFile
from .st_file import StFile
from .hawc2_input_writer import HAWC2InputWriter
__all__ = sorted([m for m in set(dir()) - set(d)])
'''
Created on 24/04/2014
@author: MMPE
'''
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from io import open
from builtins import range
from builtins import int
from future import standard_library
standard_library.install_aliases()
import os
import numpy as np
class AEFile(object):
"""Read HAWC2 AE (aerodynamic blade layout) file
"""Read and write the HAWC2 AE (aerodynamic blade layout) file
examples
--------
>>> aefile = AEFile(r"tests/test_files/NREL_5MW_ae.txt")
>>> print (aefile.thickness(36)) # Interpolated thickness at radius 36
>>> aefile.thickness(36) # Interpolated thickness at radius 36
23.78048780487805
>>> print (aefile.chord(36)) # Interpolated chord at radius 36
>>> aefile.chord(36) # Interpolated chord at radius 36
3.673
>>> print (aefile.pc_set_nr(36)) # pc set number at radius 36
>>> aefile.pc_set_nr(36) # pc set number at radius 36
1
>>> ae= AEFile()
ae.add_set(radius=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
chord=[1.1, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1],
thickness=[100.0, 100.0, 90.0, 80.0, 70.0, 60.0, 50.0, 40.0, 30.0, 20.0, 10.0],
pc_set_id=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
>>> str(ae)
1 r[m] Chord[m] T/C[%] Set no.
1 11
0.00000000000000000e+00 1.10000000000000009e+00 1.00000000000000000e+02 1
1.00000000000000006e-01 1.00000000000000000e+00 1.00000000000000000e+02 1
2.00000000000000011e-01 9.00000000000000022e-01 9.00000000000000000e+01 1
2.99999999999999989e-01 8.00000000000000044e-01 8.00000000000000000e+01 1
4.00000000000000022e-01 6.99999999999999956e-01 7.00000000000000000e+01 1
5.00000000000000000e-01 5.99999999999999978e-01 6.00000000000000000e+01 1
5.99999999999999978e-01 5.00000000000000000e-01 5.00000000000000000e+01 1
6.99999999999999956e-01 4.00000000000000022e-01 4.00000000000000000e+01 1
8.00000000000000044e-01 2.99999999999999989e-01 3.00000000000000000e+01 1
9.00000000000000022e-01 2.00000000000000011e-01 2.00000000000000000e+01 1
1.00000000000000000e+00 1.00000000000000006e-01 1.00000000000000000e+01 1
"""
def __init__(self, filename):
with open (filename) as fid:
lines = fid.readlines()
nsets = int(lines[0].split()[0])
lptr = 1
self.ae_sets = {}
for _ in range(1, nsets + 1):
for _ in range(nsets):
set_nr, n_rows = [int(v) for v in lines[lptr ].split()[:2]]
lptr += 1
data = np.array([[float(v) for v in l.split()[:4]] for l in lines[lptr:lptr + n_rows]])
self.ae_sets[set_nr] = data
lptr += n_rows
cols = ['radius', 'chord', 'relative_thickness', 'setnr']
def __init__(self, filename=None):
self.ae_sets = {}
if filename is not None:
self._read_file(filename)
def _value(self, radius, column, set_nr=1):
ae_data = self.ae_sets[set_nr]
if radius is None:
return ae_data[:,column]
return ae_data[:, column]
else:
return np.interp(radius, ae_data[:, 0], ae_data[:, column])
......@@ -55,14 +55,14 @@ class AEFile(object):
def thickness(self, radius=None, set_nr=1):
return self._value(radius, 2, set_nr)
def radius_ae(self, radius=None, set_nr=1):
radii = self.ae_sets[set_nr][:,0]
radii = self.ae_sets[set_nr][:, 0]
if radius:
return radii[np.argmin(np.abs(radii-radius))]
return radii[np.argmin(np.abs(radii - radius))]
else:
return radii
def pc_set_nr(self, radius, set_nr=1):
ae_data = self.ae_sets[set_nr]
index = np.searchsorted(ae_data[:, 0], radius)
......@@ -72,13 +72,60 @@ class AEFile(object):
raise NotImplementedError
return setnrs[0]
def add_set(self, radius, chord, thickness, pc_set_id, set_id=None):
'''This method will add another set to the ae data'''
if set_id is None:
set_id = 1
while set_id in self.ae_sets.keys():
set_id += 1
self.ae_sets[set_id] = np.array([radius, chord, thickness, pc_set_id]).T
return set_id
def __str__(self):
'''This method will create a string that is formatted like an ae file with the data in this class'''
n_sets = len(self.ae_sets)
retval = str(n_sets) + ' r[m] Chord[m] T/C[%] Set no.\n'
for st_idx, st in self.ae_sets.items():
retval += str(st_idx) + ' ' + str(len(st)) + '\n'
for line in st:
retval += '%25.17e %25.17e %25.17e %5d\n' % (line[0], line[1], line[2], line[3])
return retval
def save(self, filename):
if not os.path.isdir(os.path.dirname(filename)):
# fails if dirname is empty string
if len(os.path.dirname(filename)) > 0:
os.makedirs(os.path.dirname(filename))
with open(filename, 'w') as fid:
fid.write(str(self))
def _read_file(self, filename):
''' This method will read in the ae data from a HAWC2 ae file'''
with open(filename) as fid:
lines = fid.readlines()
nsets = int(lines[0].split()[0])
lptr = 1
self.ae_sets = {}
for _ in range(1, nsets + 1):
set_nr, n_rows = [int(v) for v in lines[lptr].split()[:2]]
lptr += 1
data = np.array([[float(v) for v in l.split()[:4]] for l in lines[lptr:lptr + n_rows]])
self.ae_sets[set_nr] = data
lptr += n_rows
def main():
if __name__ == "__main__":
ae = AEFile(os.path.dirname(__file__) + "/tests/test_files/NREL_5MW_ae.txt")
print(ae.radius_ae(36))
print(ae.thickness())
print(ae.chord(36))
print(ae.pc_set_nr(36))
ae.add_set(radius=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
chord=[1.1, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1],
thickness=[100.0, 100.0, 90.0, 80.0, 70.0, 60.0, 50.0, 40.0, 30.0, 20.0, 10.0],
pc_set_id=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
print(str(ae))
if __name__ == "__main__":
ae = AEFile(r"tests/test_files/NREL_5MW_ae.txt")
print (ae.radius_ae(36))
print (ae.thickness())
print (ae.chord(36))
print (ae.pc_set_nr(36))
main()
......@@ -3,12 +3,6 @@ Created on 06/09/2013
@author: Mads M. Pedersen (mmpe@dtu.dk)
'''
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
from wetb.hawc2.ascii2bin.ascii2bin import ascii2bin
if __name__=="__main__":
......
"""
General Time Series Data Format - a HDF5 format for time series
"""
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
d = None
d = dir()
......
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from io import open
from builtins import int
from future import standard_library
standard_library.install_aliases()
from wetb.hawc2.ascii2bin.pandas_dat_ascii2bin import pandas_dat_ascii2bin
import sys
import warnings
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
from wetb.hawc2.ascii2bin.pandas_dat_ascii2bin import pandas_dat_ascii2bin
class TextUI(object):
def show_message(self, m):
print (m)
print(m)
def exec_long_task(self, text, allow_cancel, task, *args, **kwargs):
print (text)
def exec_long_task(self, title, text, allow_cancel, task, *args, **kwargs):
print(text)
return task(*args, **kwargs)
sys.path.append(".")
def size_from_file(selfilename):
with open(selfilename, encoding='utf-8') as f:
info = f.readlines()[8].split()
......@@ -30,6 +24,7 @@ def size_from_file(selfilename):
no_sensors = int(info[1])
return (scans, no_sensors)
def ascii2bin(ascii_selfilename, bin_selfilename=None, ui=TextUI()):
# Convert dat file
......@@ -37,7 +32,6 @@ def ascii2bin(ascii_selfilename, bin_selfilename=None, ui=TextUI()):
if bin_selfilename is None:
bin_selfilename = ascii_selfilename[:-4] + "_bin.sel"
# Read, convert and write sel file
with open(ascii_selfilename, encoding='utf-8') as f:
lines = f.readlines()
......@@ -59,9 +53,10 @@ def ascii2bin(ascii_selfilename, bin_selfilename=None, ui=TextUI()):
if ui is not None:
ui.show_message("Finish converting %s to %s" % (ascii_selfilename, bin_selfilename))
if __name__ == "__main__":
if len(sys.argv) < 2:
print ("syntax: ascii2bin ascii_sel_filename [bin_sel_filename]")
print("syntax: ascii2bin ascii_sel_filename [bin_sel_filename]")
elif len(sys.argv) == 2:
ascii2bin(sys.argv[1])
else:
......
......@@ -3,9 +3,6 @@ Created on 06/09/2013
@author: Mads M. Pedersen (mmpe@dtu.dk)
'''
from __future__ import division, print_function, absolute_import, unicode_literals
from future import standard_library
standard_library.install_aliases()
if __name__=="__main__":
......
......@@ -3,32 +3,25 @@ Created on 10/01/2014
@author: MMPE
'''
from __future__ import division
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
import numpy as np
from pandas import read_csv
def pandas_dat_ascii2bin(ascii_filename, bin_filename, ui):
df = ui.exec_long_task("Reading ascii file", False, read_csv, ascii_filename, sep=" ", skipinitialspace=True, header=None)
df = ui.exec_long_task("Please wait...", "Reading ascii file", False, read_csv, ascii_filename, sep=" ",
skipinitialspace=True, header=None)
def compress(df, bin_filename):
with open(bin_filename, 'wb') as outfile:
scale_factors = []
for _, sensor in df.iteritems():
for _, sensor in df.items():
sf = sensor.abs().max() / 32000
if sf > 0:
sensor /= sf
np.round(sensor.values).astype(np.int16).tofile(outfile)
scale_factors.append(sf)
return np.array(scale_factors)
#return compress(df, bin_filename)
return ui.exec_long_task("Compress and save as binary", False, compress, df, bin_filename)
# return compress(df, bin_filename)
return ui.exec_long_task("Please wait...", "Compress and save as binary", False, compress, df, bin_filename)
......@@ -3,12 +3,6 @@ Created on 29/10/2013
@author: mmpe
'''
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
from wetb.hawc2 import Hawc2io
import numpy as np
import os
......@@ -19,22 +13,14 @@ from wetb.hawc2.ascii2bin.ascii2bin import ascii2bin, size_from_file
testfilepath = os.path.join(os.path.dirname(__file__), 'test_files/') # test file path
class TextUI(object):
def show_message(self, m):
pass
def exec_long_task(self, text, allow_cancel, task, *args, **kwargs):
return task(*args, **kwargs)
class TestAscii2Bin(unittest.TestCase):
def testAscii2bin(self):
for f in ["Hawc2ascii_bin.sel", "Hawc2ascii_bin.dat"]:
if os.path.exists(testfilepath + f):
os.remove(testfilepath + f)
ascii2bin(testfilepath + "Hawc2ascii.sel", ui=TextUI())
ascii2bin(testfilepath + "Hawc2ascii.sel")
ascii_file = Hawc2io.ReadHawc2(testfilepath + 'Hawc2ascii')
bin_file = Hawc2io.ReadHawc2(testfilepath + "Hawc2ascii_bin")
......@@ -46,7 +32,7 @@ class TestAscii2Bin(unittest.TestCase):
for f in ["Hawc2bin.sel", "Hawc2bin.dat"]:
if os.path.exists(testfilepath + f):
os.remove(testfilepath + f)
ascii2bin(testfilepath + "Hawc2ascii.sel", testfilepath + "Hawc2bin.sel", ui=TextUI())
ascii2bin(testfilepath + "Hawc2ascii.sel", testfilepath + "Hawc2bin.sel")
ascii_file = Hawc2io.ReadHawc2(testfilepath + 'Hawc2ascii')
bin_file = Hawc2io.ReadHawc2(testfilepath + "Hawc2bin")
......@@ -54,12 +40,10 @@ class TestAscii2Bin(unittest.TestCase):
np.testing.assert_array_almost_equal(ascii_file.ReadAscii(), bin_file.ReadBinary(), 1)
self.assertEqual(ascii_file.ChInfo, bin_file.ChInfo)
def testSizeOfFile(self):
self.assertEqual(size_from_file(testfilepath + "Hawc2ascii.sel"), (800, 28))
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
......@@ -3,14 +3,6 @@ Created on 24/04/2014
@author: MMPE
'''
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from io import open
from builtins import range
from future import standard_library
standard_library.install_aliases()
import numpy as np
class AtTimeFile(object):
......