diff --git a/wetb/hawc2/Hawc2io.py b/wetb/hawc2/Hawc2io.py index f001f0d744eede9c90de5c669c536368700415a9..fcc3b6302e8efbf510472ad1efee109a77613a0d 100644 --- a/wetb/hawc2/Hawc2io.py +++ b/wetb/hawc2/Hawc2io.py @@ -93,19 +93,28 @@ class ReadHawc2(object): Format = temp[3] # reads channel info (name, unit and description) Name = []; Unit = []; Description = []; - for i in range(0, self.NrCh): - temp = str(Lines[i + 12][12:43]); Name.append(temp.strip()) - temp = str(Lines[i + 12][43:54]); Unit.append(temp.strip()) - temp = str(Lines[i + 12][54:-1]); Description.append(temp.strip()) - self.ChInfo = [Name, Unit, Description] - # if binary file format, scaling factors are read + min_size = self.NrCh+12 if Format.lower() == 'binary': - self.ScaleFactor = np.zeros(self.NrCh) + min_size+=self.NrCh+2 self.FileFormat = 'HAWC2_BINARY' - for i in range(0, self.NrCh): - self.ScaleFactor[i] = float(Lines[i + 12 + self.NrCh + 2]) else: self.FileFormat = 'HAWC2_ASCII' + self.empty_sel = True + #print('MIMC ReadHawc2: the min size is %d and I have %d lines'%(min_size, len(Lines))) + if len(Lines)>=min_size: + #print('MIMC ReadHawc2: about to load data') + for i in range(0, self.NrCh): + temp = str(Lines[i + 12][12:43]); Name.append(temp.strip()) + temp = str(Lines[i + 12][43:54]); Unit.append(temp.strip()) + temp = str(Lines[i + 12][54:-1]); Description.append(temp.strip()) + self.ChInfo = [Name, Unit, Description] + # if binary file format, scaling factors are read + if Format.lower() == 'binary': + self.ScaleFactor = np.zeros(self.NrCh) + for i in range(0, self.NrCh): + self.ScaleFactor[i] = float(Lines[i + 12 + self.NrCh + 2]) + self.empty_sel = False + #print('MIMC ReadHawc2: the empty_sel field is:', self.empty_sel) ################################################################################ # read sensor file for FLEX format def _ReadSensorFile(self): @@ -146,22 +155,29 @@ class ReadHawc2(object): fid.close() ################################################################################ # init function, load channel and other general result file info - def __init__(self, FileName, ReadOnly=0): - self.FileName = FileName - self.ReadOnly = ReadOnly + def __init__(self, FileName=None, ReadOnly=0): self.Iknown = [] # to keep track of what has been read all ready self.Data = np.zeros(0) self.alias = {} - if FileName.lower().endswith('.sel') or os.path.isfile(FileName + ".sel"): - self._ReadSelFile() - elif FileName.lower().endswith('.int') or os.path.isfile(self.FileName + ".int"): - self.FileFormat = 'FLEX' - self._ReadSensorFile() - elif FileName.lower().endswith('.hdf5') or os.path.isfile(self.FileName + ".hdf5"): - self.FileFormat = 'GTSDF' - self.ReadGtsdf() - else: - print ("unknown file: " + FileName) + #print('MIMC ReadHawc2: about to read the descriptions of the results') + self.Read_Result_Description(FileName, ReadOnly) + ################################################################################ + # This function will read the sel or sensor files as needed + def Read_Result_Description(self, FileName=None, ReadOnly=0): + self.FileName = FileName + self.ReadOnly = ReadOnly + if not FileName is None: + if FileName.lower().endswith('.sel') or os.path.isfile(FileName + ".sel"): + #print('MIMC ReadHawc2: about to read the sel file') + self._ReadSelFile() + elif FileName.lower().endswith('.int') or os.path.isfile(self.FileName + ".int"): + self.FileFormat = 'FLEX' + self._ReadSensorFile() + elif FileName.lower().endswith('.hdf5') or os.path.isfile(self.FileName + ".hdf5"): + self.FileFormat = 'GTSDF' + self.ReadGtsdf() + else: + print ("unknown file: " + FileName) ################################################################################ # Read results in binary format def ReadBinary(self, ChVec=[]): @@ -208,14 +224,35 @@ class ReadHawc2(object): data = np.hstack([self.Time[:,np.newaxis], data]) return data ################################################################################ + # Generate some dummy results based on 0 arrays. + # Used in cases where HAWC2 has crached but we need results to continue + def GenerateDummyResults(self, scan_cnt, ch_cnt): + #print('MIMC ReadHawc2: In the GenerateDummyResults function now') + self.generating_dummy_results = True + #print('MIMC ReadHawc2: self.generating_dummy_results:', self.generating_dummy_results) + return np.zeros((scan_cnt, ch_cnt)) + ################################################################################ # One stop call for reading all data formats - def ReadAll(self, ChVec=[]): + def ReadAll(self, ChVec=[], generate_dummy_if_needed=False): + #print('MIMC ReadHawc2: about to read everything') + self.generating_dummy_results = False if not ChVec and not self.FileFormat == 'GTSDF': ChVec = range(0, self.NrCh) - if self.FileFormat == 'HAWC2_BINARY': - return self.ReadBinary(ChVec) - elif self.FileFormat == 'HAWC2_ASCII': - return self.ReadAscii(ChVec) + if self.FileFormat == 'HAWC2_BINARY' or self.FileFormat == 'HAWC2_ASCII': + if self.empty_sel: + #print('MIMC ReadHawc2: It seems the sel was empty') + if generate_dummy_if_needed: + #print('MIMC ReadHawc2: generating empty results') + return self.GenerateDummyResults(self.NrSc, len(ChVec)) + else: + #print('MIMC ReadHawc2: about to throw an error because we cannot tolerate empty results') + raise Exception('The sel file was empty when trying to read the HAWC2 results') + else: + #print('MIMC ReadHawc2: We have our sel file and we will read the results') + if self.FileFormat == 'HAWC2_BINARY': + return self.ReadBinary(ChVec) + elif self.FileFormat == 'HAWC2_ASCII': + return self.ReadAscii(ChVec) elif self.FileFormat == 'GTSDF': return self.ReadGtsdf() else: @@ -255,8 +292,17 @@ class ReadHawc2(object): return self.Data[:, tuple(I1)] ############################################################################ # Load the data - def load_data(self): - self.Data = self.ReadAll() + def load_data(self, generate_dummy_if_needed=False): + #print('MIMC ReadHawc2: loading the data') + self.Data = self.ReadAll(generate_dummy_if_needed=generate_dummy_if_needed) + #print('MIMC ReadHawc2: self.generating_dummy_results:', self.generating_dummy_results) + ############################################################################ + # Load the dummy data + def load_dummy_data(self, scn_cnt, ch_cnt): + #print('MIMC ReadHawc2: loading dummy data') + self.Data = self.GenerateDummyResults(scn_cnt, ch_cnt) + self.NrSc=scn_cnt + self.NrCh=ch_cnt ############################################################################ # Get the signal def get_signal(self, chid): @@ -300,6 +346,8 @@ class ReadHawc2(object): key = self.alias[key] if key in old_key: raise Exception('Circular alias') + if isinstance(key,str) and key.isdigit(): + key=int(key) return self.get_signal(key) diff --git a/wetb/hawc2/hawc2_simulation.py b/wetb/hawc2/hawc2_simulation.py index 6b704b9f3b526b1195f15d262e25c7f9d7d07cc5..1e3096f0a339eb8c1987b9c58f676c884e0429fc 100644 --- a/wetb/hawc2/hawc2_simulation.py +++ b/wetb/hawc2/hawc2_simulation.py @@ -9,12 +9,15 @@ from wetb.hawc2.ae_file import AEFile from wetb.hawc2.pc_file import PCData from wetb.hawc2.st_file import StOrigData, StFPMData from wetb.hawc2.Hawc2io import ReadHawc2 +from subprocess import Popen, TimeoutExpired + try: - import subprocess32 as subprocess - _with_timeout = True + from mpi4py import MPI + comm = MPI.COMM_WORLD + rank = comm.Get_rank() except: - import subprocess - _with_timeout = False + MPI = None + rank = 0 def mkdir_for_file_in_dir(filename): # prepare the search @@ -35,6 +38,19 @@ def open_file_in_dir(filename, mode): # return an open file descriptor in that folder return open(filename, mode) +def _get_file_time(file_path): + if os.path.isfile(file_path): + file_time = os.path.getmtime(file_path) + return True, file_time + return (False, 0.0) + +def _file_changed(old_file, new_file): + if old_file[0]!=new_file[0]: + return True + if old_file[1]!=new_file[1]: + return True + return False + class Hawc2_Simulation(object): def __init__(self, source_file_in = None): @@ -58,6 +74,8 @@ class Hawc2_Simulation(object): self.object_sensors_normalized = False # The results of the simulation self.results = None + # The flag that indicates if the results are dummy + self.using_dummy_results = True # These are the execution settings # This is the command that launches hawc2 @@ -73,9 +91,15 @@ class Hawc2_Simulation(object): # Run the simulation within try-except (hides error messages) self.exec_use_try_except = True # Run the simulation with a time-out ... useful for detecting errors - self.exec_use_time_out = _with_timeout + self.exec_use_time_out = True # The time-out used in conjunction with using a time-out self.exec_time_out = 1200 + # This tells whether the log file should be monitored to continue running + self.exec_monitor_log = True + # This is the period that the log should be checked to see if the file is still running + self.exec_log_check_period = 30 + # This is the log file that should be used for monitoring. Setting NONE means the logfile in the htc will be used instead + self.exec_log_file_path = None # Send messages self.exec_verbose = True @@ -247,7 +271,7 @@ class Hawc2_Simulation(object): if not os.path.isfile(accompanying_file) and not os.path.isdir(accompanying_file): accompanying_file=os.path.join(self.source_directory,accompanying_file) if not os.path.isfile(accompanying_file) and not os.path.isdir(accompanying_file): - raise ValueError('The accompanying file must be contained within the directory of the HTC file, or one of the sub-directories. file: {} not found'.format(accompanying_file)) + raise ValueError('The accompanying file "%s" must be contained within the directory of the HTC file, or one of the sub-directories'%(accompanying_file)) else: accompanying_file=os.path.abspath(os.path.realpath(accompanying_file)) if not accompanying_file.startswith(self.source_directory): @@ -261,6 +285,84 @@ class Hawc2_Simulation(object): print('does_input_exist called') return False + # This is the function that actually executes and manages all the checking + def _execute_simulation(self, exec_str): + + run_without_log_checking = True + # If we need to monitor the log + if self.exec_monitor_log: + #print('MIMC at rank %d running with log monitoring:'%(rank)) + # First see if there is a log file + log_file = self.exec_log_file_path + #print('MIMC at rank %d default log:'%(rank), log_file) + if log_file is None and 'simulation.logfile.0' in self.htcf.all_keys(): + log_file = self.htcf['simulation.logfile.0'] + #print('MIMC at rank %d actual log:'%(rank), log_file) + # If we have a log file then execute accordingly + if not log_file is None: + # ensure that we do not re-run + run_without_log_checking = False + # prepare the variables + retval = None + time_at_start_of_execution = None + if self.exec_use_time_out: + time_at_start_of_execution = time.time() + #print('MIMC at rank %d time_at_start_of_execution:'%(rank), time_at_start_of_execution) + #print('############ at rank %d MIMC about to launch ################'%(rank)) + # Lets launch the process + popen_hawc2 = Popen(exec_str) + # Get the first log file status + old_time = _get_file_time(log_file) + #print('MIMC at rank %d old_time:'%(rank), old_time[0], old_time[1]) + # Loop over checking periodic + while retval is None: + #print('MIMC at rank %d in the monitoring loop'%(rank)) + # Wait for an output + try: + # determine the wait time. If time-out is also in effect ensure that we do not wait PAST the time-out + wait_time = self.exec_log_check_period + if self.exec_use_time_out: + time_at_check = time.time() + time_until_time_out = self.exec_time_out+time_at_start_of_execution-time_at_check + if time_until_time_out<wait_time: + wait_time = time_until_time_out + #print('MIMC at rank %d waiting %f seconds for exit'%(rank, wait_time)) + retval = popen_hawc2.wait(timeout=wait_time) + # If time-out then check if we need to kill + except TimeoutExpired: + #print('MIMC at rank %d time-out occured'%(rank)) + # get the new time for the file + new_time = _get_file_time(log_file) + #print('MIMC at rank %d new_time:'%(rank), new_time[0], new_time[1]) + # If the file status has not changed ... then kill + if not _file_changed(old_time, new_time): + print('Killing HAWC2 because the log file indicates no new information') + popen_hawc2.kill() + raise Exception('Forced to kill because there is nothing new in the log file') + # update the old time to the new time + old_time = new_time + #print('MIMC at rank %d old_time:'%(rank), old_time[0], old_time[1]) + # check if we need to check time-out + if self.exec_use_time_out: + # get current time + time_at_check = time.time() + #print('MIMC at rank %d time_at_check:'%(rank), time_at_check) + # Check if we have ran over time and must kill + if time_at_check-time_at_start_of_execution >= self.exec_time_out: + print('At rank %d Killing HAWC2 because the time-out has occured'%(rank)) + popen_hawc2.kill() + raise Exception('Forced to kill because the simulation exceeded the time-out') + #print('MIMC at rank %d Verify that we have a valid return value:'%(rank)) + if retval != 0: + #print('MIMC Throwing exception because it appears that HAWC2 has crashed'%(rank)) + raise Exception('The return value indicates that HAWC2 has crashed') + if run_without_log_checking: + #print('MIMC at rank %d run_without_log_checking'%(rank)) + if self.exec_use_time_out: + sub_proc = subprocess.run(exec_str, timeout=self.exec_time_out) + else: + sub_proc = subprocess.run(exec_str) + # This will execute the simulation def run_simulation(self): @@ -268,6 +370,8 @@ class Hawc2_Simulation(object): old_home = os.getcwd() # Change to the htc directory + if not os.path.isdir(self.write_directory): + os.makedirs(self.write_directory) os.chdir(self.write_directory) # This is the point where you would specify that a file is running @@ -276,21 +380,23 @@ class Hawc2_Simulation(object): exec_str = self.exec_command+' '+self.input_files['htc'][0].written_file exec_str = exec_str.split() if not self.exec_dry_run: + #print('MIMC at rank %d running without dry-run'%(rank)) self.exec_success = False - try_count = 0 if self.exec_use_try_except: + #print('MIMC at rank %d running with try-except'%(rank)) + try_count = 0 while not self.exec_success and try_count<self.exec_retry_count: try_count+=1 + #print('MIMC at rank %d running try_count:'%(rank), try_count) try: - if self.exec_use_time_out: - proc = subprocess.check_output(exec_str, timeout=self.exec_time_out) - else: - proc = subprocess.check_output(exec_str) + self._execute_simulation(exec_str) + #print('MIMC at rank %d exec success'%(rank)) self.exec_success = True - if self.exec_verbose: - print(exec_str, 'output:') - print(proc) + #if self.exec_verbose: + # print(exec_str, 'output:') + # print(proc) except: + #print('MIMC at rank %d in the top level except'%(rank)) # wait a little and try again if try_count<self.exec_retry_count: print(exec_str, ' crashed for case __ ... About to execute another attempt') @@ -298,17 +404,11 @@ class Hawc2_Simulation(object): else: print(exec_str, ' crashed for case __ for the final time, it appears something fundamental is wrong') else: - if self.exec_use_time_out: - proc = subprocess.check_output(exec_str, timeout=self.exec_time_out) - else: - proc = subprocess.check_output(exec_str) + self._execute_simulation(exec_str) self.exec_success = True - if self.exec_verbose: - print(exec_str, 'output:') - print(proc) else: print(exec_str + ' dry run...') - self.exec_success = True + self.exec_success = False # return to the old directory os.chdir(old_home) @@ -355,7 +455,7 @@ class Hawc2_Simulation(object): # return to the old directory os.chdir(old_home) - def load_results(self): + def load_results(self, force_load_dummy_results=False, generate_dummy_if_needed=False): # Get the directory old_home = os.getcwd() @@ -367,9 +467,48 @@ class Hawc2_Simulation(object): os.chdir(self.write_directory) # load the results - file_name = self.htcf['output.filename.0'] - self.results = ReadHawc2(file_name) - self.results.load_data() + # Check if we need to load_dummy_results + if force_load_dummy_results: + #print('MIMC about to load dummy results') + start_time = 0.0 + delta_time = 1.0 + end_time = 0.0 + ch_cnt = 0 + if not self.htcf is None: + keys = self.htcf.all_keys() + if 'output.time.0' in keys: + start_time = float(self.htcf['output.time.0']) + if 'simulation.time_stop.0' in keys: + end_time = float(self.htcf['simulation.time_stop.0']) + if 'output.time.1' in keys: + tmp = float(self.htcf['output.time.1']) + if tmp<end_time: + end_time=tmp + if 'simulation.newmark.deltat.0' in keys: + delta_time = float(self.htcf['simulation.newmark.deltat.0']) + sensor_list = self.htcf['output'].sensors + ch_cnt = 0 + for sensor in sensor_list: + if sensor['__on__']: + sensor_size = sensor.get_sensor_size() + ch_cnt+=sensor_size + if delta_time<=0.0: + delta_time = 1.0 + sc_cnt = int((end_time-start_time)/delta_time) + if sc_cnt<0: + sc_cnt=0 + #print('MIMC According to our calculations ... we need dummy results %d x %d'%(sc_cnt, ch_cnt)) + self.results = ReadHawc2() + self.results.load_dummy_data(sc_cnt,ch_cnt) + self.using_dummy_results = True + else: + #print('MIMC about to load REAL results') + file_name = self.htcf['output.filename.0'] + self.results = ReadHawc2(file_name) + self.results.load_data(generate_dummy_if_needed=generate_dummy_if_needed) + # Determine if we are using dummy results + self.using_dummy_results = self.results.generating_dummy_results + #print('MIMC Finished loading results of size %d x %d the using_dummy_results flag is:'%(self.results.NrSc, self.results.NrCh), self.using_dummy_results) # Add the keys sensor_list = self.htcf['output'].sensors key_list = [] @@ -432,7 +571,8 @@ class Hawc2_Simulation(object): key = key[len('hawc2_output.'):] return self.results[key] else: - raise KeyError('That key does not exist') + error_message='The key "%s" does not exist'%(key) + raise KeyError(error_message) # Set some information from on the object def __setitem__(self,key,value):