add support for new fast binary formats

1f59fdcb · Mads M. Pedersen · 13d9b8df · 1f59fdcb · 1f59fdcb · 1f59fdcb
Commit 1f59fdcb authored 4 years ago by Mads M. Pedersen
--- a/wetb/fast/fast_io.py
+++ b/wetb/fast/fast_io.py
@@ -13,6 +13,7 @@ import numpy as np
 import struct
 from itertools import takewhile

+
 def load_output(filename):
    """Load a FAST binary or ascii output file

@@ -41,6 +42,7 @@ def load_output(filename):
            return load_binary_output(filename)
    return load_ascii_output(filename)

+
 def load_ascii_output(filename):
    with open(filename) as f:
        info = {}
@@ -52,7 +54,7 @@ def load_ascii_output(filename):
            l = f.readline()
            if not l:
                raise Exception('Error finding the end of FAST out file header. Keyword Time missing.')
-            in_header= (l+' dummy').lower().split()[0] != 'time'
+            in_header = (l + ' dummy').lower().split()[0] != 'time'
            if in_header:
                header.append(l)
            else:
@@ -61,7 +63,8 @@ def load_ascii_output(filename):
                info['attribute_units'] = [unit[1:-1] for unit in f.readline().split()]

        # Data, up to end of file or empty line (potential comment line at the end)
-        data = np.array([l.strip().split() for l in takewhile(lambda x: len(x.strip())>0, f.readlines())]).astype(np.float)
+        data = np.array([l.strip().split() for l in takewhile(
+            lambda x: len(x.strip()) > 0, f.readlines())]).astype(np.float)
        return data, info


@@ -77,11 +80,16 @@ def load_binary_output(filename, use_buffer=True):
    %  Edited for FAST v7.02.00b-bjj  22-Oct-2012
    """
    def fread(fid, n, type):
-        fmt, nbytes = {'uint8': ('B', 1), 'int16':('h', 2), 'int32':('i', 4), 'float32':('f', 4), 'float64':('d', 8)}[type]
+        fmt, nbytes = {
+            'uint8': ('B', 1),
+            'int16': ('h', 2),
+            'int32': ('i', 4),
+            'float32': ('f', 4),
+            'float64': ('d', 8)}[type]
        return struct.unpack(fmt * n, fid.read(nbytes * n))

    def freadRowOrderTableBuffered(fid, n, type_in, nCols, nOff=0, type_out='float64'):
-        """ 
+        """
        Reads of row-ordered table from a binary file.

        Read `n` data of type `type_in`, assumed to be a row ordered table of `nCols` columns.
@@ -94,95 +102,107 @@ def load_binary_output(filename, use_buffer=True):
        @author E.Branlard, NREL

        """
-        fmt, nbytes = {'uint8': ('B', 1), 'int16':('h', 2), 'int32':('i', 4), 'float32':('f', 4), 'float64':('d', 8)}[type_in]
-        nLines          = int(n/nCols)
-        GoodBufferSize  = 4096*40
-        nLinesPerBuffer = int(GoodBufferSize/nCols)
-        BufferSize      = nCols * nLinesPerBuffer
-        nBuffer         = int(n/BufferSize)
+        fmt, nbytes = {
+            'uint8': (
+                'B', 1), 'int16': (
+                'h', 2), 'int32': (
+                'i', 4), 'float32': (
+                    'f', 4), 'float64': (
+                        'd', 8)}[type_in]
+        nLines = int(n / nCols)
+        GoodBufferSize = 4096 * 40
+        nLinesPerBuffer = int(GoodBufferSize / nCols)
+        BufferSize = nCols * nLinesPerBuffer
+        nBuffer = int(n / BufferSize)
        # Allocation of data
-        data = np.zeros((nLines,nCols+nOff), dtype = type_out)
+        data = np.zeros((nLines, nCols + nOff), dtype=type_out)
        # Reading
        try:
-            nIntRead   = 0
+            nIntRead = 0
            nLinesRead = 0
-            while nIntRead<n:
-                nIntToRead = min(n-nIntRead, BufferSize)
-                nLinesToRead = int(nIntToRead/nCols)
+            while nIntRead < n:
+                nIntToRead = min(n - nIntRead, BufferSize)
+                nLinesToRead = int(nIntToRead / nCols)
                Buffer = np.array(struct.unpack(fmt * nIntToRead, fid.read(nbytes * nIntToRead)))
-                Buffer = Buffer.reshape(-1,nCols)
-                data[ nLinesRead:(nLinesRead+nLinesToRead),  nOff:(nOff+nCols)  ] = Buffer
+                Buffer = Buffer.reshape(-1, nCols)
+                data[nLinesRead:(nLinesRead + nLinesToRead), nOff:(nOff + nCols)] = Buffer
                nLinesRead = nLinesRead + nLinesToRead
-                nIntRead   = nIntRead   + nIntToRead
-        except:
-            raise Exception('Read only %d of %d values in file:' % (nIntRead, n, filename))
+                nIntRead = nIntRead + nIntToRead
+        except Exception:
+            raise Exception('Read only %d of %d values in file: %s' % (nIntRead, n, filename))
        return data

-
-    FileFmtID_WithTime = 1  #% File identifiers used in FAST
+    FileFmtID_WithTime = 1  # % File identifiers used in FAST
    FileFmtID_WithoutTime = 2
-    LenName = 10  #;  % number of characters per channel name
-    LenUnit = 10  #;  % number of characters per unit name
+    FileFmtID_NoCompressWithoutTime = 3
+    FileFmtID_ChanLen_In = 4  # time channel and channel length is not included

    with open(filename, 'rb') as fid:
-        FileID = fread(fid, 1, 'int16')  #;             % FAST output file format, INT(2)
-        if FileID[0] not in [FileFmtID_WithTime, FileFmtID_WithoutTime]:
+        FileID = fread(fid, 1, 'int16')[0]  # ;             % FAST output file format, INT(2)
+        if FileID not in [FileFmtID_WithTime, FileFmtID_WithoutTime,
+                          FileFmtID_ChanLen_In, FileFmtID_NoCompressWithoutTime]:
            raise Exception('FileID not supported {}. Is it a FAST binary file?'.format(FileID))

-        NumOutChans = fread(fid, 1, 'int32')[0]  #;             % The number of output channels, INT(4)
-        NT = fread(fid, 1, 'int32')[0]  #;             % The number of time steps, INT(4)
+        if FileID == FileFmtID_ChanLen_In:
+            LenName = fread(fid, 1, 'int16')[0]  # Number of characters in channel names and units
+        else:
+            LenName = 10  # default number of characters per channel name

+        NumOutChans = fread(fid, 1, 'int32')[0]  # ;             % The number of output channels, INT(4)
+        NT = fread(fid, 1, 'int32')[0]  # ;             % The number of time steps, INT(4)

        if FileID == FileFmtID_WithTime:
-            TimeScl = fread(fid, 1, 'float64')  #;           % The time slopes for scaling, REAL(8)
-            TimeOff = fread(fid, 1, 'float64')  #;           % The time offsets for scaling, REAL(8)
+            TimeScl = fread(fid, 1, 'float64')  # ;           % The time slopes for scaling, REAL(8)
+            TimeOff = fread(fid, 1, 'float64')  # ;           % The time offsets for scaling, REAL(8)
        else:
-            TimeOut1 = fread(fid, 1, 'float64')  #;           % The first time in the time series, REAL(8)
-            TimeIncr = fread(fid, 1, 'float64')  #;           % The time increment, REAL(8)
+            TimeOut1 = fread(fid, 1, 'float64')  # ;           % The first time in the time series, REAL(8)
+            TimeIncr = fread(fid, 1, 'float64')  # ;           % The time increment, REAL(8)

+        if FileID == FileFmtID_NoCompressWithoutTime:
+            ColScl = np.ones(NumOutChans)
+            ColOff = np.zeros(NumOutChans)
+        else:
+            ColScl = fread(fid, NumOutChans, 'float32')  # ; % The channel slopes for scaling, REAL(4)
+            ColOff = fread(fid, NumOutChans, 'float32')  # ; % The channel offsets for scaling, REAL(4)

-
-
-        ColScl = fread(fid, NumOutChans, 'float32')  #; % The channel slopes for scaling, REAL(4)
-        ColOff = fread(fid, NumOutChans, 'float32')  #; % The channel offsets for scaling, REAL(4)
-
-        LenDesc = fread(fid, 1, 'int32')[0]  #;  % The number of characters in the description string, INT(4)
-        DescStrASCII = fread(fid, LenDesc, 'uint8')  #;  % DescStr converted to ASCII
+        LenDesc = fread(fid, 1, 'int32')[0]  # ;  % The number of characters in the description string, INT(4)
+        DescStrASCII = fread(fid, LenDesc, 'uint8')  # ;  % DescStr converted to ASCII
        DescStr = "".join(map(chr, DescStrASCII)).strip()

-
-
        ChanName = []  # initialize the ChanName cell array
        for iChan in range(NumOutChans + 1):
-            ChanNameASCII = fread(fid, LenName, 'uint8')  #; % ChanName converted to numeric ASCII
+            ChanNameASCII = fread(fid, LenName, 'uint8')  # ; % ChanName converted to numeric ASCII
            ChanName.append("".join(map(chr, ChanNameASCII)).strip())

-
        ChanUnit = []  # initialize the ChanUnit cell array
        for iChan in range(NumOutChans + 1):
-            ChanUnitASCII = fread(fid, LenUnit, 'uint8')  #; % ChanUnit converted to numeric ASCII
+            ChanUnitASCII = fread(fid, LenName, 'uint8')  # ; % ChanUnit converted to numeric ASCII
            ChanUnit.append("".join(map(chr, ChanUnitASCII)).strip()[1:-1])

-
        #    %-------------------------
        #    % get the channel time series
        #    %-------------------------

-        nPts = NT * NumOutChans  #;           % number of data points in the file
-
+        nPts = NT * NumOutChans  # ;           % number of data points in the file

        if FileID == FileFmtID_WithTime:
-            PackedTime = fread(fid, NT, 'int32')  #; % read the time data
+            PackedTime = fread(fid, NT, 'int32')  # ; % read the time data
            cnt = len(PackedTime)
            if cnt < NT:
                raise Exception('Could not read entire %s file: read %d of %d time values' % (filename, cnt, NT))

        if use_buffer:
            # Reading data using buffers, and allowing an offset for time column (nOff=1)
-            data = freadRowOrderTableBuffered(fid, nPts, 'int16', NumOutChans, nOff=1, type_out='float64')
+            if FileID == FileFmtID_NoCompressWithoutTime:
+                data = freadRowOrderTableBuffered(fid, nPts, 'float64', NumOutChans, nOff=1, type_out='float64')
+            else:
+                data = freadRowOrderTableBuffered(fid, nPts, 'int16', NumOutChans, nOff=1, type_out='float64')
        else:
            # NOTE: unpacking huge data not possible on 32bit machines
-            PackedData = fread(fid, nPts, 'int16')  #; % read the channel data
+            if FileID == FileFmtID_NoCompressWithoutTime:
+                PackedData = fread(fid, nPts, 'float64')  # ; % read the channel data
+            else:
+                PackedData = fread(fid, nPts, 'int16')  # ; % read the channel data
            cnt = len(PackedData)
            if cnt < nPts:
                raise Exception('Could not read entire %s file: read %d of %d values' % (filename, cnt, nPts))
@@ -190,7 +210,7 @@ def load_binary_output(filename, use_buffer=True):
            del PackedData

    if FileID == FileFmtID_WithTime:
-        time = (np.array(PackedTime) - TimeOff) / TimeScl;
+        time = (np.array(PackedTime) - TimeOff) / TimeScl
    else:
        time = TimeOut1 + TimeIncr * np.arange(NT)

@@ -200,9 +220,9 @@ def load_binary_output(filename, use_buffer=True):
    if use_buffer:
        # Scaling Data
        for iCol in range(NumOutChans):
-            data[:,iCol+1] = (data[:,iCol+1] - ColOff[iCol]) / ColScl[iCol]
+            data[:, iCol + 1] = (data[:, iCol + 1] - ColOff[iCol]) / ColScl[iCol]
        # Adding time column
-        data[:,0] = time
+        data[:, 0] = time
    else:
        # NOTE: memory expensive due to time conversion, and concatenation
        data = (data - ColOff) / ColScl
@@ -213,4 +233,3 @@ def load_binary_output(filename, use_buffer=True):
            'attribute_names': ChanName,
            'attribute_units': ChanUnit}
    return data, info
-
--- a/wetb/fast/tests/test_fast_io.py
+++ b/wetb/fast/tests/test_fast_io.py
-'''
-Created on 03/09/2015
-
-@author: MMPE
-'''
 from __future__ import unicode_literals
 from __future__ import print_function
 from __future__ import division
 from __future__ import absolute_import
 from future import standard_library
+from tests import npt
+import pytest
 standard_library.install_aliases()
-import unittest
-
+import numpy as np
 from wetb.fast.fast_io import load_output, load_binary_output
 import os

 testfilepath = os.path.join(os.path.dirname(__file__), 'test_files/')  # test file path


-class TestFastIO(unittest.TestCase):
-
-    def test_load_output(self):
-        data, info = load_output(testfilepath + 'DTU10MW.out')
-        self.assertAlmostEqual(data[4, 3], 4.295E-04)
-        self.assertEqual(info['name'], "DTU10MW")
-        self.assertEqual(info['attribute_names'][1], "RotPwr")
-        self.assertEqual(info['attribute_units'][1], "kW")
-
-    def test_load_binary(self):
-        data, info = load_output(testfilepath + 'test_binary.outb')
-        self.assertEqual(info['name'], 'test_binary')
-        self.assertEqual(info['description'], 'Modified by mwDeriveSensors on 27-Jul-2015 16:32:06')
-        self.assertEqual(info['attribute_names'][4], 'RotPwr')
-        self.assertEqual(info['attribute_units'][7], 'deg/s^2')
-        self.assertAlmostEqual(data[10, 4], 138.822277739535)
-
-    def test_load_binary2(self):
-        # The old method was not using a buffer and was also memory expensive
-        # Now use_buffer is set to true by default
-        import numpy as np
-        data, info = load_binary_output(testfilepath + 'test_binary.outb', use_buffer=True)
-        data_old, info_old = load_binary_output(testfilepath + 'test_binary.outb', use_buffer=False)
-        self.assertEqual(info['name'], info_old['name'])
-        np.testing.assert_array_equal(data[0, :], data_old[0, :])
-        np.testing.assert_array_equal(data[-1, :], data_old[-1, :])
-
-    def test_load_output2(self):
-        data, info = load_output(testfilepath + 'DTU10MW.out')
-        self.assertEqual(info['name'], "DTU10MW")
-        self.assertEqual(info['attribute_names'][1], "RotPwr")
-        self.assertEqual(info['attribute_units'][1], "kW")
-
-    def test_load_output3(self):
-        # This file has an extra comment at the end
-        data, info = load_output(testfilepath + 'FASTOut_Hydro.out')
-        self.assertAlmostEqual(data[3, 1], -1.0E+01)
-
-
-if __name__ == "__main__":
-    #import sys;sys.argv = ['', 'Test.testload_output']
-    unittest.main()
+def test_load_output():
+    data, info = load_output(testfilepath + 'DTU10MW.out')
+    npt.assert_equal(data[4, 3], 4.295E-04)
+    npt.assert_equal(info['name'], "DTU10MW")
+    npt.assert_equal(info['attribute_names'][1], "RotPwr")
+    npt.assert_equal(info['attribute_units'][1], "kW")
+
+
+def test_load_binary():
+    data, info = load_output(testfilepath + 'test_binary.outb')
+    npt.assert_equal(info['name'], 'test_binary')
+    npt.assert_equal(info['description'], 'Modified by mwDeriveSensors on 27-Jul-2015 16:32:06')
+    npt.assert_equal(info['attribute_names'][4], 'RotPwr')
+    npt.assert_equal(info['attribute_units'][7], 'deg/s^2')
+    npt.assert_almost_equal(data[10, 4], 138.822277739535)
+
+
+def test_load_binary_buffered():
+    # The old method was not using a buffer and was also memory expensive
+    # Now use_buffer is set to true by default
+    import numpy as np
+    data, info = load_binary_output(testfilepath + 'test_binary.outb', use_buffer=True)
+    data_old, info_old = load_binary_output(testfilepath + 'test_binary.outb', use_buffer=False)
+    npt.assert_equal(info['name'], info_old['name'])
+    np.testing.assert_array_equal(data[0, :], data_old[0, :])
+    np.testing.assert_array_equal(data[-1, :], data_old[-1, :])
+
+
+@pytest.mark.parametrize('fid,tol', [(2, 1), (3, 1e-4)])
+@pytest.mark.parametrize('buffer', [True, False])
+def test_load_bindary_fid(fid, tol, buffer):
+    data2, info2 = load_output(testfilepath + '5MW_Land_BD_DLL_WTurb_fid04.outb')
+    data, info = load_binary_output(testfilepath + '5MW_Land_BD_DLL_WTurb_fid%02d.outb' % fid, use_buffer=buffer)
+    for k, v in info2.items():
+        if k not in {'name', 'description'}:
+            npt.assert_array_equal(info[k], v)
+    r = data.max(0) - data.min(0) + 1e-20
+    npt.assert_array_less(np.abs(data - data2).max(0), r * tol)
+
+
+def test_load_output2():
+    data, info = load_output(testfilepath + 'DTU10MW.out')
+    npt.assert_equal(info['name'], "DTU10MW")
+    npt.assert_equal(info['attribute_names'][1], "RotPwr")
+    npt.assert_equal(info['attribute_units'][1], "kW")
+
+
+def test_load_output3():
+    # This file has an extra comment at the end
+    data, info = load_output(testfilepath + 'FASTOut_Hydro.out')
+    npt.assert_almost_equal(data[3, 1], -1.0E+01)
--- a/wetb/fast/tests/test_files/5MW_Land_BD_DLL_WTurb_fid02.outb
+++ b/wetb/fast/tests/test_files/5MW_Land_BD_DLL_WTurb_fid02.outb
--- a/wetb/fast/tests/test_files/5MW_Land_BD_DLL_WTurb_fid03.outb
+++ b/wetb/fast/tests/test_files/5MW_Land_BD_DLL_WTurb_fid03.outb
--- a/wetb/fast/tests/test_files/5MW_Land_BD_DLL_WTurb_fid04.outb
+++ b/wetb/fast/tests/test_files/5MW_Land_BD_DLL_WTurb_fid04.outb