Commit ad85be20 authored by Jennifer Rinker's avatar Jennifer Rinker

initial upload io -- hawc2

parent 7ff65760
Pipeline #3484 failed with stage
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
#from __future__ import standard_library
#standard_library.install_aliases()
from .hawc2 import sel_to_df
# d = None # not sure if we need these lines (rink)
# d = dir()
# from .htc_file import HTCFile
# from .log_file import LogFile
# from .ae_file import AEFile
# from .at_time_file import AtTimeFile
# from .pc_file import PCFile
# from . import shear_file
# from .st_file import StFile
# __all__ = sorted([m for m in set(dir()) - set(d)])
"""Input-output module for files related to HAWC2
Notes
-----
1. This submodule is a work in progress. We will need to determine how best to
organize everything.
2. rink used Pyhon 3.6 string formatting because I love it.
Current Status/To-Do
--------------------
2017.10.13 - functions related to loading HAWC2 binary files added. next steps
are to
(1) port David's unique channel names (w/modifications),
(2) write tests, and
(3) write documentation.
Author
------
Jenni Rinker
rink@dtu.dk
"""
import os
import numpy as np
import pandas as pd
# hard-code parameters from .set file format
_sel_lineno_info = 8 # line no. in sel with info
_sel_lineno_chanstart = 12 # line no. channel start
_sel_col_wds = [13, 44, 55] # character widths in sel file
_sel_df_cols = ['channel', 'var_desc', 'units', 'notes', 'scale']
def sel_to_df(path):
"""Load data from a HAWC2 sel file into a pandas dataframe
Arguments
---------
path : str
Path to .sel file
Returns
-------
res_df : pandas.DataFrame
Pandas dataframe with results from HAWC2
"""
# load info from sel file
path = os.path.splitext(path)[0] # remove ext if given
sel_path = f'{path}.sel' # path to sel file
with open(sel_path, 'r') as sel_fid:
sel_lines = sel_fid.readlines()
line_info = sel_lines[_sel_lineno_info].split()
n_chnls = int(line_info[1])
fmt = line_info[3]
# load data from sel file
sel_df = pd.DataFrame(np.empty((n_chnls, len(_sel_df_cols))),
columns=_sel_df_cols) # dataframe with sel data
sel_df['scale'] = 1. # initialize scale factor to one
for i_line in range(n_chnls):
sel_line = sel_lines[i_line + _sel_lineno_chanstart]
sel_df.iloc[i_line, 0] = int(sel_line[:_sel_col_wds[0]].strip()) # chn
sel_df.iloc[i_line, 1] = sel_line[_sel_col_wds[0]:
_sel_col_wds[1]].strip() # varble
sel_df.iloc[i_line, 2] = sel_line[_sel_col_wds[1]:
_sel_col_wds[2]].strip() # units
sel_df.iloc[i_line, 3] = sel_line[_sel_col_wds[2]:].strip() # descptn
if fmt.lower() == 'binary': # get scale if binary
sel_df.iloc[i_line, -1] = float(sel_lines[i_line +
_sel_lineno_chanstart
+ n_chnls + 2])
return sel_df
def get_unique_chnl_names(sel):
"""Unique channel names from hawc2 sel file
"""
if isinstance(sel, str):
sel_df = sel_to_df(sel)
elif isinstance(sel, pd.DataFrame):
sel_df = sel
else:
raise ValueError(f'Unknown input type {type(sel)}')
channel_names = range(1, 1 + sel_df.shape[0]) # TODO: fix this
return channel_names
def get_num_scans(path):
"""get number of scans from sel file
"""
# load info from sel file
path = os.path.splitext(path)[0] # remove ext if given
sel_path = f'{path}.sel' # path to sel file
with open(sel_path, 'r') as sel_fid:
sel_lines = sel_fid.readlines()
line_info = sel_lines[_sel_lineno_info].split()
return int(line_info[0])
def dat_to_df(path, n_scns=None, channel_names=None, sel_df=None):
"""HAWC2 binary .dat file to pandas dataframe
"""
if n_scns is None: # get number of scans if not passed in
n_scns = n_scns = get_num_scans(path)
if channel_names is None: # get channel names if not passed in
channel_names = get_unique_chnl_names(path)
if sel_df is None: # get .sel dataframe if not passed in
sel_df = sel_to_df(path)
dat_path = os.path.splitext(path)[0] + '.dat' # path to dat file
with open(dat_path, 'rb') as dat_fid:
dat_df = pd.DataFrame(np.zeros((n_scns, len(channel_names))),
columns=channel_names)
j = 0
for i in range(sel_df.shape[0]):
dat_fid.seek(i * n_scns * 2, 0)
dat_df.iloc[:, j] = np.fromfile(dat_fid, 'int16', n_scns) * \
sel_df.loc[i, 'scale']
j += 1
return dat_df
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment