Skip to content
Snippets Groups Projects
Commit 694dc264 authored by Emmanuel Branlard's avatar Emmanuel Branlard Committed by Mads M. Pedersen
Browse files

More fast IO and improved memory efficiency

parent 532b9d71
No related branches found
No related tags found
No related merge requests found
'''
Created on 03/09/2015
@author: MMPE
'''
from __future__ import division
from __future__ import unicode_literals
from __future__ import print_function
......@@ -16,6 +11,7 @@ standard_library.install_aliases()
import os
import numpy as np
import struct
from itertools import takewhile
def load_output(filename):
"""Load a FAST binary or ascii output file
......@@ -49,21 +45,31 @@ def load_ascii_output(filename):
with open(filename) as f:
info = {}
info['name'] = os.path.splitext(os.path.basename(filename))[0]
try:
header = [f.readline() for _ in range(8)]
info['description'] = header[4].strip()
info['attribute_names'] = header[6].split()
info['attribute_units'] = [unit[1:-1] for unit in header[7].split()] #removing "()"
data = np.array([line.split() for line in f.readlines()]).astype(np.float)
return data, info
except (ValueError, AssertionError):
raise
# Header is whatever is before the keyword `time`
in_header = True
header = []
while in_header:
l = f.readline()
if not l:
raise Exception('Error finding the end of FAST out file header. Keyword Time missing.')
in_header= (l+' dummy').lower().split()[0] != 'time'
if in_header:
header.append(l)
else:
info['description'] = header
info['attribute_names'] = l.split()
info['attribute_units'] = [unit[1:-1] for unit in f.readline().split()]
# Data, up to end of file or empty line (potential comment line at the end)
data = np.array([l.strip().split() for l in takewhile(lambda x: len(x.strip())>0, f.readlines())]).astype(np.float)
return data, info
def load_binary_output(filename, use_buffer=True):
"""
03/09/15: Ported from ReadFASTbinary.m by Mads M Pedersen, DTU Wind
24/10/18: Low memory/buffered version by E. Branlard, NREL
def load_binary_output(filename):
"""Ported from ReadFASTbinary.m by Mads M Pedersen, DTU Wind
Info about ReadFASTbinary.m:
% Author: Bonnie Jonkman, National Renewable Energy Laboratory
% (c) 2012, National Renewable Energy Laboratory
......@@ -74,6 +80,45 @@ def load_binary_output(filename):
fmt, nbytes = {'uint8': ('B', 1), 'int16':('h', 2), 'int32':('i', 4), 'float32':('f', 4), 'float64':('d', 8)}[type]
return struct.unpack(fmt * n, fid.read(nbytes * n))
def freadRowOrderTableBuffered(fid, n, type_in, nCols, nOff=0, type_out='float64'):
"""
Reads of row-ordered table from a binary file.
Read `n` data of type `type_in`, assumed to be a row ordered table of `nCols` columns.
Memory usage is optimized by allocating the data only once.
Buffered reading is done for improved performances (in particular for 32bit python)
`nOff` allows for additional column space at the begining of the storage table.
Typically, `nOff=1`, provides a column at the beginning to store the time vector.
@author E.Branlard, NREL
"""
fmt, nbytes = {'uint8': ('B', 1), 'int16':('h', 2), 'int32':('i', 4), 'float32':('f', 4), 'float64':('d', 8)}[type_in]
nLines = int(n/nCols)
GoodBufferSize = 4096*40
nLinesPerBuffer = int(GoodBufferSize/nCols)
BufferSize = nCols * nLinesPerBuffer
nBuffer = int(n/BufferSize)
# Allocation of data
data = np.zeros((nLines,nCols+nOff), dtype = type_out)
# Reading
try:
nIntRead = 0
nLinesRead = 0
while nIntRead<n:
nIntToRead = min(n-nIntRead, BufferSize)
nLinesToRead = int(nIntToRead/nCols)
Buffer = np.array(struct.unpack(fmt * nIntToRead, fid.read(nbytes * nIntToRead)))
Buffer = Buffer.reshape(-1,nCols)
data[ nLinesRead:(nLinesRead+nLinesToRead), nOff:(nOff+nCols) ] = Buffer
nLinesRead = nLinesRead + nLinesToRead
nIntRead = nIntRead + nIntToRead
except:
raise Exception('Read only %d of %d values in file:' % (nIntRead, n, filename))
return data
FileFmtID_WithTime = 1 #% File identifiers used in FAST
FileFmtID_WithoutTime = 2
LenName = 10 #; % number of characters per channel name
......@@ -81,10 +126,13 @@ def load_binary_output(filename):
with open(filename, 'rb') as fid:
FileID = fread(fid, 1, 'int16') #; % FAST output file format, INT(2)
if FileID[0] not in [FileFmtID_WithTime, FileFmtID_WithoutTime]:
raise Exception('FileID not supported {}. Is it a FAST binary file?'.format(FileID))
NumOutChans = fread(fid, 1, 'int32')[0] #; % The number of output channels, INT(4)
NT = fread(fid, 1, 'int32')[0] #; % The number of time steps, INT(4)
if FileID == FileFmtID_WithTime:
TimeScl = fread(fid, 1, 'float64') #; % The time slopes for scaling, REAL(8)
TimeOff = fread(fid, 1, 'float64') #; % The time offsets for scaling, REAL(8)
......@@ -128,26 +176,37 @@ def load_binary_output(filename):
cnt = len(PackedTime)
if cnt < NT:
raise Exception('Could not read entire %s file: read %d of %d time values' % (filename, cnt, NT))
PackedData = fread(fid, nPts, 'int16') #; % read the channel data
cnt = len(PackedData)
if cnt < nPts:
raise Exception('Could not read entire %s file: read %d of %d values' % (filename, cnt, nPts))
# %-------------------------
# % Scale the packed binary to real data
# %-------------------------
#
data = np.array(PackedData).reshape(NT, NumOutChans)
data = (data - ColOff) / ColScl
if use_buffer:
# Reading data using buffers, and allowing an offset for time column (nOff=1)
data = freadRowOrderTableBuffered(fid, nPts, 'int16', NumOutChans, nOff=1, type_out='float64')
else:
# NOTE: unpacking huge data not possible on 32bit machines
PackedData = fread(fid, nPts, 'int16') #; % read the channel data
cnt = len(PackedData)
if cnt < nPts:
raise Exception('Could not read entire %s file: read %d of %d values' % (filename, cnt, nPts))
data = np.array(PackedData).reshape(NT, NumOutChans)
del PackedData
if FileID == FileFmtID_WithTime:
time = (np.array(PackedTime) - TimeOff) / TimeScl;
else:
time = TimeOut1 + TimeIncr * np.arange(NT)
data = np.concatenate([time.reshape(NT, 1), data], 1)
# %-------------------------
# % Scale the packed binary to real data
# %-------------------------
if use_buffer:
# Scaling Data
for iCol in range(NumOutChans):
data[:,iCol+1] = (data[:,iCol+1] - ColOff[iCol]) / ColScl[iCol]
# Adding time column
data[:,0] = time
else:
# NOTE: memory expensive due to time conversion, and concatenation
data = (data - ColOff) / ColScl
data = np.concatenate([time.reshape(NT, 1), data], 1)
info = {'name': os.path.splitext(os.path.basename(filename))[0],
'description': DescStr,
......
......@@ -11,13 +11,14 @@ from future import standard_library
standard_library.install_aliases()
import unittest
from wetb.fast.fast_io import load_output
from wetb.fast.fast_io import load_output, load_binary_output
import os
testfilepath = os.path.join(os.path.dirname(__file__), 'test_files/') # test file path
class TestFastIO(unittest.TestCase):
class TestFastIO(unittest.TestCase):
def test_load_output(self):
data, info = load_output(testfilepath + 'DTU10MW.out')
self.assertAlmostEqual(data[4, 3], 4.295E-04)
......@@ -25,8 +26,6 @@ class TestFastIO(unittest.TestCase):
self.assertEqual(info['attribute_names'][1], "RotPwr")
self.assertEqual(info['attribute_units'][1], "kW")
def test_load_binary(self):
data, info = load_output(testfilepath + 'test_binary.outb')
self.assertEqual(info['name'], 'test_binary')
......@@ -35,12 +34,26 @@ class TestFastIO(unittest.TestCase):
self.assertEqual(info['attribute_units'][7], 'deg/s^2')
self.assertAlmostEqual(data[10, 4], 138.822277739535)
def test_load_binary2(self):
# The old method was not using a buffer and was also memory expensive
# Now use_buffer is set to true by default
import numpy as np
data, info = load_binary_output(testfilepath + 'test_binary.outb', use_buffer=True)
data_old, info_old = load_binary_output(testfilepath + 'test_binary.outb', use_buffer=False)
self.assertEqual(info['name'], info_old['name'])
np.testing.assert_array_equal(data[0, :], data_old[0, :])
np.testing.assert_array_equal(data[-1, :], data_old[-1, :])
def test_load_output2(self):
data, info = load_output(testfilepath + 'DTU10MW.out')
self.assertEqual(info['name'], "DTU10MW")
self.assertEqual(info['attribute_names'][1], "RotPwr")
self.assertEqual(info['attribute_units'][1], "kW")
def test_load_output3(self):
# This file has an extra comment at the end
data, info = load_output(testfilepath + 'FASTOut_Hydro.out')
self.assertAlmostEqual(data[3, 1], -1.0E+01)
if __name__ == "__main__":
......
These predictions were generated by HydroDyn on 19-Oct-2018 at 10:15:15.
Time Wave1Elev
(sec) (m)
0.0 0.0e+01
1.0 1.0e+01
2.0 0.0e+01
3.0 -1.0e+01
4.0 0.0e+01
This output file was closed on 19-Oct-2018 at 10:15:16.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment