Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
W
WindEnergyToolbox
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
toolbox
WindEnergyToolbox
Commits
f181b12c
Commit
f181b12c
authored
7 years ago
by
Mads M. Pedersen
Browse files
Options
Downloads
Patches
Plain Diff
implemented statistics in gtsdf
parent
a759fea8
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
wetb/gtsdf/__init__.py
+3
-0
3 additions, 0 deletions
wetb/gtsdf/__init__.py
wetb/gtsdf/gtsdf.py
+137
-73
137 additions, 73 deletions
wetb/gtsdf/gtsdf.py
wetb/gtsdf/tests/test_gtsdf_stat.py
+64
-0
64 additions, 0 deletions
wetb/gtsdf/tests/test_gtsdf_stat.py
with
204 additions
and
73 deletions
wetb/gtsdf/__init__.py
+
3
−
0
View file @
f181b12c
...
@@ -38,6 +38,9 @@ from .gtsdf import save
...
@@ -38,6 +38,9 @@ from .gtsdf import save
from
.gtsdf
import
load
from
.gtsdf
import
load
from
.gtsdf
import
append_block
from
.gtsdf
import
append_block
from
.gtsdf
import
load_pandas
from
.gtsdf
import
load_pandas
from
.gtsdf
import
add_statistic
from
.gtsdf
import
load_statistic
from
.gtsdf
import
compress2statistics
class
Dataset
(
object
):
class
Dataset
(
object
):
def
__init__
(
self
,
filename
):
def
__init__
(
self
,
filename
):
...
...
This diff is collapsed.
Click to expand it.
wetb/gtsdf/gtsdf.py
+
137
−
73
View file @
f181b12c
...
@@ -3,6 +3,7 @@ from builtins import zip
...
@@ -3,6 +3,7 @@ from builtins import zip
from
builtins
import
range
from
builtins
import
range
from
builtins
import
str
from
builtins
import
str
from
future
import
standard_library
from
future
import
standard_library
from
wetb.fatigue_tools.fatigue
import
eq_load
standard_library
.
install_aliases
()
standard_library
.
install_aliases
()
import
warnings
import
warnings
from
wetb.gtsdf.unix_time
import
from_unix
from
wetb.gtsdf.unix_time
import
from_unix
...
@@ -13,6 +14,7 @@ except ImportError as e:
...
@@ -13,6 +14,7 @@ except ImportError as e:
import
os
import
os
import
numpy
as
np
import
numpy
as
np
import
numpy.ma
as
ma
import
numpy.ma
as
ma
import
pandas
as
pd
block_name_fmt
=
"
block%04d
"
block_name_fmt
=
"
block%04d
"
def
load
(
filename
,
dtype
=
None
):
def
load
(
filename
,
dtype
=
None
):
...
@@ -89,80 +91,95 @@ def load(filename, dtype=None):
...
@@ -89,80 +91,95 @@ def load(filename, dtype=None):
'
type
'
:
'
General time series data format
'
,
'
type
'
:
'
General time series data format
'
,
'
description
'
:
'
MyDatasetDescription
'
}
'
description
'
:
'
MyDatasetDescription
'
}
"""
"""
f
=
_open_h5py_file
(
filename
)
try
:
info
=
_load_info
(
f
)
time
,
data
=
_load_timedata
(
f
,
dtype
)
return
time
,
data
,
info
finally
:
try
:
f
.
close
()
except
:
pass
def
_open_h5py_file
(
filename
):
if
isinstance
(
filename
,
h5py
.
File
):
if
isinstance
(
filename
,
h5py
.
File
):
f
=
filename
f
=
filename
filename
=
f
.
filename
filename
=
f
.
filename
else
:
else
:
assert
os
.
path
.
isfile
(
filename
),
"
File, %s, does not exists
"
%
filename
assert
os
.
path
.
isfile
(
filename
),
"
File, %s, does not exists
"
%
filename
f
=
h5py
.
File
(
filename
,
'
r
'
)
f
=
h5py
.
File
(
filename
,
'
r
'
)
try
:
return
f
def
decode
(
v
):
if
isinstance
(
v
,
bytes
):
def
decode
(
v
):
return
v
.
decode
(
'
latin1
'
)
if
isinstance
(
v
,
bytes
):
return
v
return
v
.
decode
(
'
latin1
'
)
elif
hasattr
(
v
,
'
len
'
):
return
[
decode
(
v_
)
for
v_
in
v
]
info
=
{
k
:
decode
(
v
)
for
k
,
v
in
f
.
attrs
.
items
()}
return
v
check_type
(
f
)
if
(
block_name_fmt
%
0
)
not
in
f
:
def
_load_info
(
f
):
raise
ValueError
(
"
HDF5 file must contain a group named
'
%s
'"
%
(
block_name_fmt
%
0
))
block0
=
f
[
block_name_fmt
%
0
]
if
'
data
'
not
in
block0
:
info
=
{
k
:
decode
(
v
)
for
k
,
v
in
f
.
attrs
.
items
()}
raise
ValueError
(
"
group %s must contain a dataset called
'
data
'"
%
(
block_name_fmt
%
0
))
check_type
(
f
)
_
,
no_attributes
=
block0
[
'
data
'
].
shape
if
'
name
'
not
in
info
:
if
'
name
'
not
in
info
:
info
[
'
name
'
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
f
.
filename
))[
0
]
info
[
'
name
'
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
filename
))[
0
]
if
'
attribute_names
'
in
f
:
if
'
attribute_names
'
in
f
:
info
[
'
attribute_names
'
]
=
[
v
.
decode
(
'
latin1
'
)
for
v
in
f
[
'
attribute_names
'
]]
info
[
'
attribute_names
'
]
=
[
v
.
decode
(
'
latin1
'
)
for
v
in
f
[
'
attribute_names
'
]]
if
'
attribute_units
'
in
f
:
if
'
attribute_units
'
in
f
:
info
[
'
attribute_units
'
]
=
[
v
.
decode
(
'
latin1
'
)
for
v
in
f
[
'
attribute_units
'
]]
info
[
'
attribute_units
'
]
=
[
v
.
decode
(
'
latin1
'
)
for
v
in
f
[
'
attribute_units
'
]]
if
'
attribute_descriptions
'
in
f
:
if
'
attribute_descriptions
'
in
f
:
info
[
'
attribute_descriptions
'
]
=
[
v
.
decode
(
'
latin1
'
)
for
v
in
f
[
'
attribute_descriptions
'
]]
info
[
'
attribute_descriptions
'
]
=
[
v
.
decode
(
'
latin1
'
)
for
v
in
f
[
'
attribute_descriptions
'
]]
return
info
no_blocks
=
f
.
attrs
[
'
no_blocks
'
]
def
_load_timedata
(
f
,
dtype
):
if
dtype
is
None
:
no_blocks
=
f
.
attrs
[
'
no_blocks
'
]
file_dtype
=
f
[
block_name_fmt
%
0
][
'
data
'
].
dtype
if
(
block_name_fmt
%
0
)
not
in
f
:
if
"
float
"
in
str
(
file_dtype
):
raise
ValueError
(
"
HDF5 file must contain a group named
'
%s
'"
%
(
block_name_fmt
%
0
))
dtype
=
file_dtype
block0
=
f
[
block_name_fmt
%
0
]
elif
file_dtype
in
[
np
.
int8
,
np
.
uint8
,
np
.
int16
,
np
.
uint16
]:
if
'
data
'
not
in
block0
:
dtype
=
np
.
float32
raise
ValueError
(
"
group %s must contain a dataset called
'
data
'"
%
(
block_name_fmt
%
0
))
else
:
_
,
no_attributes
=
block0
[
'
data
'
].
shape
dtype
=
np
.
float64
time
=
[]
data
=
[]
if
dtype
is
None
:
for
i
in
range
(
no_blocks
):
file_dtype
=
f
[
block_name_fmt
%
0
][
'
data
'
].
dtype
if
"
float
"
in
str
(
file_dtype
):
try
:
dtype
=
file_dtype
block
=
f
[
block_name_fmt
%
i
]
elif
file_dtype
in
[
np
.
int8
,
np
.
uint8
,
np
.
int16
,
np
.
uint16
]:
except
KeyError
:
dtype
=
np
.
float32
continue
else
:
no_observations
,
no_attributes
=
block
[
'
data
'
].
shape
dtype
=
np
.
float64
block_time
=
(
block
.
get
(
'
time
'
,
np
.
arange
(
no_observations
))[:]).
astype
(
np
.
float64
)
time
=
[]
if
'
time_step
'
in
block
.
attrs
:
data
=
[]
block_time
*=
block
.
attrs
[
'
time_step
'
]
for
i
in
range
(
no_blocks
):
if
'
time_start
'
in
block
.
attrs
:
block_time
+=
block
.
attrs
[
'
time_start
'
]
time
.
extend
(
block_time
)
block_data
=
block
[
'
data
'
][:].
astype
(
dtype
)
if
"
int
"
in
str
(
block
[
'
data
'
].
dtype
):
block_data
[
block_data
==
np
.
iinfo
(
block
[
'
data
'
].
dtype
).
max
]
=
np
.
nan
if
'
gains
'
in
block
:
block_data
*=
block
[
'
gains
'
][:]
if
'
offsets
'
in
block
:
block_data
+=
block
[
'
offsets
'
][:]
data
.
append
(
block_data
)
f
.
close
()
if
no_blocks
>
0
:
data
=
np
.
vstack
(
data
)
return
np
.
array
(
time
).
astype
(
np
.
float64
),
np
.
array
(
data
).
astype
(
dtype
),
info
except
(
ValueError
,
AssertionError
):
f
.
close
()
raise
try
:
block
=
f
[
block_name_fmt
%
i
]
except
KeyError
:
continue
no_observations
,
no_attributes
=
block
[
'
data
'
].
shape
block_time
=
(
block
.
get
(
'
time
'
,
np
.
arange
(
no_observations
))[:]).
astype
(
np
.
float64
)
if
'
time_step
'
in
block
.
attrs
:
block_time
*=
block
.
attrs
[
'
time_step
'
]
if
'
time_start
'
in
block
.
attrs
:
block_time
+=
block
.
attrs
[
'
time_start
'
]
time
.
extend
(
block_time
)
block_data
=
block
[
'
data
'
][:].
astype
(
dtype
)
if
"
int
"
in
str
(
block
[
'
data
'
].
dtype
):
block_data
[
block_data
==
np
.
iinfo
(
block
[
'
data
'
].
dtype
).
max
]
=
np
.
nan
if
'
gains
'
in
block
:
block_data
*=
block
[
'
gains
'
][:]
if
'
offsets
'
in
block
:
block_data
+=
block
[
'
offsets
'
][:]
data
.
append
(
block_data
)
if
no_blocks
>
0
:
data
=
np
.
vstack
(
data
)
return
np
.
array
(
time
).
astype
(
np
.
float64
),
np
.
array
(
data
).
astype
(
dtype
)
def
save
(
filename
,
data
,
**
kwargs
):
def
save
(
filename
,
data
,
**
kwargs
):
"""
Save a
'
General Time Series Data Format
'
-hdf5 datafile
"""
Save a
'
General Time Series Data Format
'
-hdf5 datafile
...
@@ -226,36 +243,44 @@ def save(filename, data, **kwargs):
...
@@ -226,36 +243,44 @@ def save(filename, data, **kwargs):
time_step
=
2
,
time_step
=
2
,
dtype
=
np
.
float64
)
dtype
=
np
.
float64
)
"""
"""
if
not
filename
.
lower
().
endswith
(
'
.hdf5
'
):
if
not
filename
.
lower
().
endswith
(
'
.hdf5
'
):
filename
+=
"
.hdf5
"
filename
+=
"
.hdf5
"
# exist_ok does not exist in Python27
# exist_ok does not exist in Python27
if
not
os
.
path
.
exists
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
filename
))):
if
not
os
.
path
.
exists
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
filename
))):
os
.
makedirs
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
filename
)))
#, exist_ok=True)
os
.
makedirs
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
filename
)))
#, exist_ok=True)
_save_info
(
filename
,
data
.
shape
,
**
kwargs
)
append_block
(
filename
,
data
,
**
kwargs
)
def
_save_info
(
filename
,
data_shape
,
**
kwargs
):
f
=
h5py
.
File
(
filename
,
"
w
"
)
f
=
h5py
.
File
(
filename
,
"
w
"
)
try
:
try
:
f
.
attrs
[
"
type
"
]
=
"
General time series data format
"
f
.
attrs
[
"
type
"
]
=
"
General time series data format
"
no_observations
,
no_attributes
=
data
.
shape
no_observations
,
no_attributes
=
data_shape
if
'
name
'
in
kwargs
:
if
'
name
'
in
kwargs
:
f
.
attrs
[
'
name
'
]
=
kwargs
[
'
name
'
]
f
.
attrs
[
'
name
'
]
=
kwargs
[
'
name
'
]
if
'
description
'
in
kwargs
:
if
'
description
'
in
kwargs
:
f
.
attrs
[
'
description
'
]
=
kwargs
[
'
description
'
]
f
.
attrs
[
'
description
'
]
=
kwargs
[
'
description
'
]
f
.
attrs
[
'
no_attributes
'
]
=
no_attributes
f
.
attrs
[
'
no_attributes
'
]
=
no_attributes
if
'
attribute_names
'
in
kwargs
:
if
'
attribute_names
'
in
kwargs
:
assert
len
(
kwargs
[
'
attribute_names
'
])
==
no_attributes
,
"
len(attribute_names)=%d but data shape is %s
"
%
(
len
(
kwargs
[
'
attribute_names
'
]),
data
.
shape
)
if
no_attributes
:
assert
len
(
kwargs
[
'
attribute_names
'
])
==
no_attributes
,
"
len(attribute_names)=%d but data shape is %s
"
%
(
len
(
kwargs
[
'
attribute_names
'
]),
data_shape
)
f
.
create_dataset
(
"
attribute_names
"
,
data
=
np
.
array
([
v
.
encode
(
'
utf-8
'
)
for
v
in
kwargs
[
'
attribute_names
'
]]))
f
.
create_dataset
(
"
attribute_names
"
,
data
=
np
.
array
([
v
.
encode
(
'
utf-8
'
)
for
v
in
kwargs
[
'
attribute_names
'
]]))
if
'
attribute_units
'
in
kwargs
:
if
'
attribute_units
'
in
kwargs
:
assert
(
len
(
kwargs
[
'
attribute_units
'
])
==
no_attributes
)
if
no_attributes
:
assert
(
len
(
kwargs
[
'
attribute_units
'
])
==
no_attributes
)
f
.
create_dataset
(
"
attribute_units
"
,
data
=
np
.
array
([
v
.
encode
(
'
utf-8
'
)
for
v
in
kwargs
[
'
attribute_units
'
]]))
f
.
create_dataset
(
"
attribute_units
"
,
data
=
np
.
array
([
v
.
encode
(
'
utf-8
'
)
for
v
in
kwargs
[
'
attribute_units
'
]]))
if
'
attribute_descriptions
'
in
kwargs
:
if
'
attribute_descriptions
'
in
kwargs
:
assert
(
len
(
kwargs
[
'
attribute_descriptions
'
])
==
no_attributes
)
if
no_attributes
:
assert
(
len
(
kwargs
[
'
attribute_descriptions
'
])
==
no_attributes
)
f
.
create_dataset
(
"
attribute_descriptions
"
,
data
=
np
.
array
([
v
.
encode
(
'
utf-8
'
)
for
v
in
kwargs
[
'
attribute_descriptions
'
]]))
f
.
create_dataset
(
"
attribute_descriptions
"
,
data
=
np
.
array
([
v
.
encode
(
'
utf-8
'
)
for
v
in
kwargs
[
'
attribute_descriptions
'
]]))
f
.
attrs
[
'
no_blocks
'
]
=
0
f
.
attrs
[
'
no_blocks
'
]
=
0
except
Exception
:
except
Exception
:
raise
raise
finally
:
finally
:
f
.
close
()
f
.
close
()
append_block
(
filename
,
data
,
**
kwargs
)
def
append_block
(
filename
,
data
,
**
kwargs
):
def
append_block
(
filename
,
data
,
**
kwargs
):
"""
Append a data block and corresponding time data to already existing file
"""
Append a data block and corresponding time data to already existing file
...
@@ -398,3 +423,42 @@ def check_type(f):
...
@@ -398,3 +423,42 @@ def check_type(f):
raise
ValueError
(
"
HDF5 file must contain a
'
type
'
-attribute with the value
'
General time series data format
'"
)
raise
ValueError
(
"
HDF5 file must contain a
'
type
'
-attribute with the value
'
General time series data format
'"
)
if
'
no_blocks
'
not
in
f
.
attrs
:
if
'
no_blocks
'
not
in
f
.
attrs
:
raise
ValueError
(
"
HDF5 file must contain an attribute named
'
no_blocks
'"
)
raise
ValueError
(
"
HDF5 file must contain an attribute named
'
no_blocks
'"
)
def
_get_statistic
(
time
,
data
,
statistics
=
[
'
min
'
,
'
mean
'
,
'
max
'
,
'
std
'
,
'
eq3
'
,
'
eq4
'
,
'
eq6
'
,
'
eq8
'
,
'
eq10
'
,
'
eq12
'
]):
def
get_stat
(
stat
):
if
hasattr
(
np
,
stat
):
return
getattr
(
np
,
stat
)(
data
,
0
)
elif
(
stat
.
startswith
(
"
eq
"
)
and
stat
[
2
:].
isdigit
()):
m
=
float
(
stat
[
2
:])
return
[
eq_load
(
sensor
,
46
,
m
,
time
[
-
1
]
-
time
[
0
]
+
time
[
1
]
-
time
[
0
])[
0
][
0
]
for
sensor
in
data
.
T
]
return
np
.
array
([
get_stat
(
stat
)
for
stat
in
statistics
]).
T
def
_add_statistic_data
(
file
,
stat_data
,
statistics
=
[
'
min
'
,
'
mean
'
,
'
max
'
,
'
std
'
,
'
eq3
'
,
'
eq4
'
,
'
eq6
'
,
'
eq8
'
,
'
eq10
'
,
'
eq12
'
]):
f
=
h5py
.
File
(
file
,
"
a
"
)
stat_grp
=
f
.
create_group
(
"
Statistic
"
)
stat_grp
.
create_dataset
(
"
statistic_names
"
,
data
=
np
.
array
([
v
.
encode
(
'
utf-8
'
)
for
v
in
statistics
]))
stat_grp
.
create_dataset
(
"
statistic_data
"
,
data
=
stat_data
.
astype
(
np
.
float
))
f
.
close
()
def
add_statistic
(
file
,
statistics
=
[
'
min
'
,
'
mean
'
,
'
max
'
,
'
std
'
,
'
eq3
'
,
'
eq4
'
,
'
eq6
'
,
'
eq8
'
,
'
eq10
'
,
'
eq12
'
]):
time
,
data
,
info
=
load
(
file
)
stat_data
=
_get_statistic
(
time
,
data
,
statistics
)
_add_statistic_data
(
file
,
stat_data
,
statistics
)
def
load_statistic
(
filename
):
f
=
_open_h5py_file
(
filename
)
info
=
_load_info
(
f
)
names
=
decode
(
f
[
'
Statistic
'
][
'
statistic_names
'
])
data
=
np
.
array
(
f
[
'
Statistic
'
][
'
statistic_data
'
])
return
pd
.
DataFrame
(
data
,
columns
=
names
),
info
def
compress2statistics
(
filename
,
statistics
=
[
'
min
'
,
'
mean
'
,
'
max
'
,
'
std
'
,
'
eq3
'
,
'
eq4
'
,
'
eq6
'
,
'
eq8
'
,
'
eq10
'
,
'
eq12
'
]):
time
,
data
,
info
=
load
(
filename
)
stat_data
=
_get_statistic
(
time
,
data
,
statistics
)
_save_info
(
filename
,
data
.
shape
,
**
info
)
_add_statistic_data
(
filename
,
stat_data
,
statistics
)
This diff is collapsed.
Click to expand it.
wetb/gtsdf/tests/test_gtsdf_stat.py
0 → 100644
+
64
−
0
View file @
f181b12c
'''
Created on 12/09/2013
@author: mmpe
'''
from
__future__
import
division
from
__future__
import
unicode_literals
from
__future__
import
print_function
from
__future__
import
absolute_import
from
builtins
import
super
from
builtins
import
range
from
future
import
standard_library
standard_library
.
install_aliases
()
import
h5py
import
numpy
as
np
from
wetb
import
gtsdf
import
unittest
import
os
tmp_path
=
os
.
path
.
dirname
(
__file__
)
+
"
/tmp/
"
tfp
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"
/test_files/
"
class
Test_gsdf
(
unittest
.
TestCase
):
def
setUp
(
self
):
unittest
.
TestCase
.
setUp
(
self
)
if
not
os
.
path
.
isdir
(
tmp_path
):
os
.
makedirs
(
tmp_path
)
@classmethod
def
tearDownClass
(
cls
):
super
(
Test_gsdf
,
cls
).
tearDownClass
()
#shutil.rmtree(tmp_path)
def
test_gtsdf_stat
(
self
):
time
,
data
,
info
=
gtsdf
.
load
(
tfp
+
'
test.hdf5
'
)
print
(
data
.
shape
)
fn
=
tmp_path
+
"
test_stat.hdf5
"
gtsdf
.
save
(
fn
,
data
,
time
=
time
,
**
info
)
gtsdf
.
add_statistic
(
fn
)
stat_data
,
info
=
gtsdf
.
load_statistic
(
fn
)
self
.
assertEqual
(
data
[:,
0
].
min
(),
stat_data
.
values
[
0
,
0
])
self
.
assertEqual
(
stat_data
.
shape
,
(
49
,
10
))
def
test_gtsdf_compress2stat
(
self
):
time
,
data
,
info
=
gtsdf
.
load
(
tfp
+
'
test.hdf5
'
)
fn
=
tmp_path
+
"
test_compress2stat.hdf5
"
gtsdf
.
save
(
fn
,
data
,
time
=
time
,
**
info
)
gtsdf
.
save
(
tmp_path
+
"
test_compress2stat2.hdf5
"
,
data
,
time
=
time
,
dtype
=
np
.
float
,
**
info
)
gtsdf
.
compress2statistics
(
fn
)
self
.
assertLess
(
os
.
path
.
getsize
(
fn
)
*
50
,
os
.
path
.
getsize
(
tfp
+
'
test.hdf5
'
))
if
__name__
==
"
__main__
"
:
#import sys;sys.argv = ['', 'Test.testName']
unittest
.
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment