diff --git a/wetb/fast/fast_io.py b/wetb/fast/fast_io.py
index cbb1c3f9d380fbf887d13b0b57c544a6c67a52ae..abb84309179d24d63da3fcc0f1f2513f7034f357 100644
--- a/wetb/fast/fast_io.py
+++ b/wetb/fast/fast_io.py
@@ -1,8 +1,3 @@
-'''
-Created on 03/09/2015
-
-@author: MMPE
-'''
 from __future__ import division
 from __future__ import unicode_literals
 from __future__ import print_function
@@ -16,6 +11,7 @@ standard_library.install_aliases()
 import os
 import numpy as np
 import struct
+from itertools import takewhile
 
 def load_output(filename):
     """Load a FAST binary or ascii output file
@@ -49,21 +45,31 @@ def load_ascii_output(filename):
     with open(filename) as f:
         info = {}
         info['name'] = os.path.splitext(os.path.basename(filename))[0]
-        try:
-            header = [f.readline() for _ in range(8)]
-            info['description'] = header[4].strip()
-            info['attribute_names'] = header[6].split()
-            info['attribute_units'] = [unit[1:-1] for unit in header[7].split()]  #removing "()"
-            data = np.array([line.split() for line in f.readlines()]).astype(np.float)
-
-            return data, info
-        except (ValueError, AssertionError):
-
-            raise
-
+        # Header is whatever is before the keyword `time`
+        in_header = True
+        header = []
+        while in_header:
+            l = f.readline()
+            if not l:
+                raise Exception('Error finding the end of FAST out file header. Keyword Time missing.')
+            in_header= (l+' dummy').lower().split()[0] != 'time'
+            if in_header:
+                header.append(l)
+            else:
+                info['description'] = header
+                info['attribute_names'] = l.split()
+                info['attribute_units'] = [unit[1:-1] for unit in f.readline().split()]
+
+        # Data, up to end of file or empty line (potential comment line at the end)
+        data = np.array([l.strip().split() for l in takewhile(lambda x: len(x.strip())>0, f.readlines())]).astype(np.float)
+        return data, info
+
+
+def load_binary_output(filename, use_buffer=True):
+    """
+    03/09/15: Ported from ReadFASTbinary.m by Mads M Pedersen, DTU Wind
+    24/10/18: Low memory/buffered version by E. Branlard, NREL
 
-def load_binary_output(filename):
-    """Ported from ReadFASTbinary.m by Mads M Pedersen, DTU Wind
     Info about ReadFASTbinary.m:
     % Author: Bonnie Jonkman, National Renewable Energy Laboratory
     % (c) 2012, National Renewable Energy Laboratory
@@ -74,6 +80,45 @@ def load_binary_output(filename):
         fmt, nbytes = {'uint8': ('B', 1), 'int16':('h', 2), 'int32':('i', 4), 'float32':('f', 4), 'float64':('d', 8)}[type]
         return struct.unpack(fmt * n, fid.read(nbytes * n))
 
+    def freadRowOrderTableBuffered(fid, n, type_in, nCols, nOff=0, type_out='float64'):
+        """ 
+        Reads of row-ordered table from a binary file.
+
+        Read `n` data of type `type_in`, assumed to be a row ordered table of `nCols` columns.
+        Memory usage is optimized by allocating the data only once.
+        Buffered reading is done for improved performances (in particular for 32bit python)
+
+        `nOff` allows for additional column space at the begining of the storage table.
+        Typically, `nOff=1`, provides a column at the beginning to store the time vector.
+
+        @author E.Branlard, NREL
+
+        """
+        fmt, nbytes = {'uint8': ('B', 1), 'int16':('h', 2), 'int32':('i', 4), 'float32':('f', 4), 'float64':('d', 8)}[type_in]
+        nLines          = int(n/nCols)
+        GoodBufferSize  = 4096*40
+        nLinesPerBuffer = int(GoodBufferSize/nCols)
+        BufferSize      = nCols * nLinesPerBuffer
+        nBuffer         = int(n/BufferSize)
+        # Allocation of data
+        data = np.zeros((nLines,nCols+nOff), dtype = type_out)
+        # Reading
+        try:
+            nIntRead   = 0
+            nLinesRead = 0
+            while nIntRead<n:
+                nIntToRead = min(n-nIntRead, BufferSize)
+                nLinesToRead = int(nIntToRead/nCols)
+                Buffer = np.array(struct.unpack(fmt * nIntToRead, fid.read(nbytes * nIntToRead)))
+                Buffer = Buffer.reshape(-1,nCols)
+                data[ nLinesRead:(nLinesRead+nLinesToRead),  nOff:(nOff+nCols)  ] = Buffer
+                nLinesRead = nLinesRead + nLinesToRead
+                nIntRead   = nIntRead   + nIntToRead
+        except:
+            raise Exception('Read only %d of %d values in file:' % (nIntRead, n, filename))
+        return data
+
+
     FileFmtID_WithTime = 1  #% File identifiers used in FAST
     FileFmtID_WithoutTime = 2
     LenName = 10  #;  % number of characters per channel name
@@ -81,10 +126,13 @@ def load_binary_output(filename):
 
     with open(filename, 'rb') as fid:
         FileID = fread(fid, 1, 'int16')  #;             % FAST output file format, INT(2)
+        if FileID[0] not in [FileFmtID_WithTime, FileFmtID_WithoutTime]:
+            raise Exception('FileID not supported {}. Is it a FAST binary file?'.format(FileID))
 
         NumOutChans = fread(fid, 1, 'int32')[0]  #;             % The number of output channels, INT(4)
         NT = fread(fid, 1, 'int32')[0]  #;             % The number of time steps, INT(4)
 
+
         if FileID == FileFmtID_WithTime:
             TimeScl = fread(fid, 1, 'float64')  #;           % The time slopes for scaling, REAL(8)
             TimeOff = fread(fid, 1, 'float64')  #;           % The time offsets for scaling, REAL(8)
@@ -128,26 +176,37 @@ def load_binary_output(filename):
             cnt = len(PackedTime)
             if cnt < NT:
                 raise Exception('Could not read entire %s file: read %d of %d time values' % (filename, cnt, NT))
-        PackedData = fread(fid, nPts, 'int16')  #; % read the channel data
-        cnt = len(PackedData)
-        if cnt < nPts:
-            raise Exception('Could not read entire %s file: read %d of %d values' % (filename, cnt, nPts))
 
-    #    %-------------------------
-    #    % Scale the packed binary to real data
-    #    %-------------------------
-    #
-
-
-    data = np.array(PackedData).reshape(NT, NumOutChans)
-    data = (data - ColOff) / ColScl
+        if use_buffer:
+            # Reading data using buffers, and allowing an offset for time column (nOff=1)
+            data = freadRowOrderTableBuffered(fid, nPts, 'int16', NumOutChans, nOff=1, type_out='float64')
+        else:
+            # NOTE: unpacking huge data not possible on 32bit machines
+            PackedData = fread(fid, nPts, 'int16')  #; % read the channel data
+            cnt = len(PackedData)
+            if cnt < nPts:
+                raise Exception('Could not read entire %s file: read %d of %d values' % (filename, cnt, nPts))
+            data = np.array(PackedData).reshape(NT, NumOutChans)
+            del PackedData
 
     if FileID == FileFmtID_WithTime:
         time = (np.array(PackedTime) - TimeOff) / TimeScl;
     else:
         time = TimeOut1 + TimeIncr * np.arange(NT)
 
-    data = np.concatenate([time.reshape(NT, 1), data], 1)
+    #    %-------------------------
+    #    % Scale the packed binary to real data
+    #    %-------------------------
+    if use_buffer:
+        # Scaling Data
+        for iCol in range(NumOutChans):
+            data[:,iCol+1] = (data[:,iCol+1] - ColOff[iCol]) / ColScl[iCol]
+        # Adding time column
+        data[:,0] = time
+    else:
+        # NOTE: memory expensive due to time conversion, and concatenation
+        data = (data - ColOff) / ColScl
+        data = np.concatenate([time.reshape(NT, 1), data], 1)
 
     info = {'name': os.path.splitext(os.path.basename(filename))[0],
             'description': DescStr,
diff --git a/wetb/fast/tests/test_fast_io.py b/wetb/fast/tests/test_fast_io.py
index 423d6e0b7e00e4341381ccc88c183458b3087889..d89771735c9de9874522982bae31d335f3e29bd1 100644
--- a/wetb/fast/tests/test_fast_io.py
+++ b/wetb/fast/tests/test_fast_io.py
@@ -11,13 +11,14 @@ from future import standard_library
 standard_library.install_aliases()
 import unittest
 
-from wetb.fast.fast_io import load_output
+from wetb.fast.fast_io import load_output, load_binary_output
 import os
 
 testfilepath = os.path.join(os.path.dirname(__file__), 'test_files/')  # test file path
-class TestFastIO(unittest.TestCase):
 
 
+class TestFastIO(unittest.TestCase):
+
     def test_load_output(self):
         data, info = load_output(testfilepath + 'DTU10MW.out')
         self.assertAlmostEqual(data[4, 3], 4.295E-04)
@@ -25,8 +26,6 @@ class TestFastIO(unittest.TestCase):
         self.assertEqual(info['attribute_names'][1], "RotPwr")
         self.assertEqual(info['attribute_units'][1], "kW")
 
-
-
     def test_load_binary(self):
         data, info = load_output(testfilepath + 'test_binary.outb')
         self.assertEqual(info['name'], 'test_binary')
@@ -35,12 +34,26 @@ class TestFastIO(unittest.TestCase):
         self.assertEqual(info['attribute_units'][7], 'deg/s^2')
         self.assertAlmostEqual(data[10, 4], 138.822277739535)
 
+    def test_load_binary2(self):
+        # The old method was not using a buffer and was also memory expensive
+        # Now use_buffer is set to true by default
+        import numpy as np
+        data, info = load_binary_output(testfilepath + 'test_binary.outb', use_buffer=True)
+        data_old, info_old = load_binary_output(testfilepath + 'test_binary.outb', use_buffer=False)
+        self.assertEqual(info['name'], info_old['name'])
+        np.testing.assert_array_equal(data[0, :], data_old[0, :])
+        np.testing.assert_array_equal(data[-1, :], data_old[-1, :])
+
     def test_load_output2(self):
         data, info = load_output(testfilepath + 'DTU10MW.out')
         self.assertEqual(info['name'], "DTU10MW")
         self.assertEqual(info['attribute_names'][1], "RotPwr")
         self.assertEqual(info['attribute_units'][1], "kW")
 
+    def test_load_output3(self):
+        # This file has an extra comment at the end
+        data, info = load_output(testfilepath + 'FASTOut_Hydro.out')
+        self.assertAlmostEqual(data[3, 1], -1.0E+01)
 
 
 if __name__ == "__main__":
diff --git a/wetb/fast/tests/test_files/FASTOut_Hydro.out b/wetb/fast/tests/test_files/FASTOut_Hydro.out
new file mode 100644
index 0000000000000000000000000000000000000000..a8b05fcf110baf369e932fe85424a7ad2c873ba5
--- /dev/null
+++ b/wetb/fast/tests/test_files/FASTOut_Hydro.out
@@ -0,0 +1,13 @@
+
+These predictions were generated by HydroDyn on 19-Oct-2018 at 10:15:15.
+
+    Time            Wave1Elev
+   (sec)                  (m)
+    0.0           0.0e+01
+    1.0           1.0e+01
+    2.0           0.0e+01
+    3.0          -1.0e+01
+    4.0           0.0e+01
+
+This output file was closed on 19-Oct-2018 at 10:15:16.
+