Skip to content
Snippets Groups Projects
Commit b9fda18e authored by Mads M. Pedersen's avatar Mads M. Pedersen
Browse files

improved error handling in simulation.py

parent 3cbb790f
No related branches found
No related tags found
No related merge requests found
...@@ -235,12 +235,18 @@ class Simulation(object): ...@@ -235,12 +235,18 @@ class Simulation(object):
return dst return dst
output_patterns = [fmt(dst) for dst in self.htcFile.output_files() + ([], self.htcFile.turbulence_files())[self.copy_turbulence] + [self.stdout_filename]] output_patterns = [fmt(dst) for dst in self.htcFile.output_files() + ([], self.htcFile.turbulence_files())[self.copy_turbulence] + [self.stdout_filename]]
output_files = set([f for pattern in output_patterns for f in self.host.glob(unix_path(os.path.join(self.tmp_modelpath, pattern)))]) output_files = set([f for pattern in output_patterns for f in self.host.glob(unix_path(os.path.join(self.tmp_modelpath, pattern)))])
self.host._finish_simulation(output_files) try:
self.set_id(self.filename) self.host._finish_simulation(output_files)
if self.status != ERROR: if self.status != ERROR:
self.status = CLEANED self.status = CLEANED
self.logFile.reset() except Exception as e:
self.htcFile.reset() self.errors.append(str(e))
raise
finally:
self.set_id(self.filename)
self.logFile.reset()
self.htcFile.reset()
...@@ -418,20 +424,26 @@ class LocalSimulationHost(SimulationResource): ...@@ -418,20 +424,26 @@ class LocalSimulationHost(SimulationResource):
def _finish_simulation(self, output_files): def _finish_simulation(self, output_files):
missing_result_files = []
for src_file in output_files: for src_file in output_files:
dst_file = os.path.join(self.modelpath, os.path.relpath(src_file, self.tmp_modelpath)) dst_file = os.path.join(self.modelpath, os.path.relpath(src_file, self.tmp_modelpath))
# exist_ok does not exist in Python27 # exist_ok does not exist in Python27
if not os.path.isdir(os.path.dirname(dst_file)): try:
os.makedirs(os.path.dirname(dst_file)) #, exist_ok=True) if not os.path.isdir(os.path.dirname(dst_file)):
if not os.path.isfile(dst_file) or os.path.getmtime(dst_file) != os.path.getmtime(src_file): os.makedirs(os.path.dirname(dst_file)) #, exist_ok=True)
shutil.copy(src_file, dst_file) if not os.path.isfile(dst_file) or os.path.getmtime(dst_file) != os.path.getmtime(src_file):
shutil.copy(src_file, dst_file)
except:
missing_result_files.append(dst_file)
self.logFile.filename = os.path.join(self.modelpath, self.log_filename) self.logFile.filename = os.path.join(self.modelpath, self.log_filename)
if missing_result_files:
raise Warning("Failed to copy %s from %s"%(",".join(missing_result_files), self.host))
try: try:
shutil.rmtree(self.tmp_modelpath) shutil.rmtree(self.tmp_modelpath)
except (PermissionError, OSError) as e: except (PermissionError, OSError) as e:
raise Warning(str(e)) raise Warning("Fail to remove temporary files and folders on %s\n%s"%(self.host, str(e)))
def update_logFile_status(self): def update_logFile_status(self):
self.logFile.update_status() self.logFile.update_status()
...@@ -535,19 +547,24 @@ class PBSClusterSimulationHost(SimulationResource, SSHClient): ...@@ -535,19 +547,24 @@ class PBSClusterSimulationHost(SimulationResource, SSHClient):
def _finish_simulation(self, output_files): def _finish_simulation(self, output_files):
with self: with self:
download_failed = []
for src_file in output_files: for src_file in output_files:
try: try:
dst_file = os.path.join(self.modelpath, os.path.relpath(src_file, self.tmp_modelpath)) dst_file = os.path.join(self.modelpath, os.path.relpath(src_file, self.tmp_modelpath))
os.makedirs(os.path.dirname(dst_file), exist_ok=True) os.makedirs(os.path.dirname(dst_file), exist_ok=True)
self.download(src_file, dst_file, retry=3) self.download(src_file, dst_file, retry=10)
except Exception as e: except Exception as e:
print (self.modelpath, src_file, self.tmp_modelpath) download_failed.append(dst_file)
raise e if download_failed:
try: raise Warning("Failed to download %s from %s"%(",".join(download_failed), self.host))
self.execute('rm -r .hawc2launcher/%s' % self.simulation_id) else:
self.execute('rm .hawc2launcher/status_%s' % self.simulation_id) try:
except: self.execute('rm -r .hawc2launcher/%s' % self.simulation_id)
pass finally:
try:
self.execute('rm .hawc2launcher/status_%s' % self.simulation_id)
except:
raise Warning("Fail to remove temporary files and folders on %s"%self.host)
def _simulate(self): def _simulate(self):
...@@ -668,6 +685,3 @@ cd /scratch/ ...@@ -668,6 +685,3 @@ cd /scratch/
### rm -r $PBS_JOBID ### rm -r $PBS_JOBID
exit""" % (self.simulation_id, self.stdout_filename, self.modelpath, self.htcFile.filename, self.resource.python_cmd, rel_htcfilename, self.resource.wine_cmd, self.hawc2exe, cp_back) exit""" % (self.simulation_id, self.stdout_filename, self.modelpath, self.htcFile.filename, self.resource.python_cmd, rel_htcfilename, self.resource.wine_cmd, self.hawc2exe, cp_back)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment