From 857539fd512dca61f6b0641ec4d5d810375527d8 Mon Sep 17 00:00:00 2001 From: madsmpedersen <m@madsp.dk> Date: Tue, 7 Jun 2016 08:34:24 +0200 Subject: [PATCH] Improved error handling --- wetb/utils/cluster_tools/cluster_resource.py | 9 +++++---- wetb/utils/cluster_tools/ssh_client.py | 21 +++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/wetb/utils/cluster_tools/cluster_resource.py b/wetb/utils/cluster_tools/cluster_resource.py index ebdf6f4a..9e18e025 100644 --- a/wetb/utils/cluster_tools/cluster_resource.py +++ b/wetb/utils/cluster_tools/cluster_resource.py @@ -24,7 +24,10 @@ class Resource(object): def ok2submit(self): """Always ok to have min_cpu cpus and ok to have more if there are min_free free cpus""" - total, free, user = self.check_resources() + try: + total, free, user = self.check_resources() + except: + return False if user < self.min_cpu: return True @@ -75,9 +78,7 @@ class SSHPBSClusterResource(Resource, SSHClient): cpu_free, nodeSum = pbswrap.count_cpus(users, host, pbsnodes) return nodeSum['used_cpu'] + cpu_free, cpu_free, cpu_user - except IOError as e: - raise e - except: + except Exception as e: raise EnvironmentError("check resources failed") def jobids(self, jobname_prefix): diff --git a/wetb/utils/cluster_tools/ssh_client.py b/wetb/utils/cluster_tools/ssh_client.py index 97d6ed8d..3a2ed178 100644 --- a/wetb/utils/cluster_tools/ssh_client.py +++ b/wetb/utils/cluster_tools/ssh_client.py @@ -25,6 +25,8 @@ class SSHClient(object): self.key = key self.disconnect = 0 self.client = None + self.sftp = None + self.transport = None if key is not None: self.key = paramiko.RSAKey.from_private_key(StringIO(key), password=passphrase) @@ -33,7 +35,8 @@ class SSHClient(object): def __enter__(self): self.disconnect += 1 - if self.client is None: + if self.client is None or self.client._transport is None or self.client._transport.is_active() is False: + self.close() self.connect() return self.client @@ -44,9 +47,7 @@ class SSHClient(object): self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) self.client.connect(self.host, self.port, username=self.username, password=self.password, pkey=self.key, timeout=self.TIMEOUT) assert self.client is not None - self.transport = paramiko.Transport((self.host, self.port)) - self.transport.connect(username=self.username, password=self.password) - self.sftp = paramiko.SFTPClient.from_transport(self.transport) + self.sftp = paramiko.SFTPClient.from_transport(self.client._transport) return self def __exit__(self, *args): @@ -85,11 +86,12 @@ class SSHClient(object): print (ret) def close(self): - if self.client is not None: - self.client.close() - self.client = None - self.sftp.close() - self.transport.close() + for x in ["sftp", "client" ]: + try: + getattr(self, x).close() + setattr(self, x, None) + except: + pass self.disconnect = False def file_exists(self, filename): @@ -180,6 +182,7 @@ class SharedSSHClient(SSHClient): while self.next != threading.currentThread(): time.sleep(1) + SSHClient.__enter__(self) return self.client def __exit__(self, *args): -- GitLab