From 857539fd512dca61f6b0641ec4d5d810375527d8 Mon Sep 17 00:00:00 2001
From: madsmpedersen <m@madsp.dk>
Date: Tue, 7 Jun 2016 08:34:24 +0200
Subject: [PATCH] Improved error handling

---
 wetb/utils/cluster_tools/cluster_resource.py |  9 +++++----
 wetb/utils/cluster_tools/ssh_client.py       | 21 +++++++++++---------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/wetb/utils/cluster_tools/cluster_resource.py b/wetb/utils/cluster_tools/cluster_resource.py
index ebdf6f4a..9e18e025 100644
--- a/wetb/utils/cluster_tools/cluster_resource.py
+++ b/wetb/utils/cluster_tools/cluster_resource.py
@@ -24,7 +24,10 @@ class Resource(object):
 
     def ok2submit(self):
         """Always ok to have min_cpu cpus and ok to have more if there are min_free free cpus"""
-        total, free, user = self.check_resources()
+        try:
+            total, free, user = self.check_resources()
+        except:
+            return False
 
         if user < self.min_cpu:
             return True
@@ -75,9 +78,7 @@ class SSHPBSClusterResource(Resource, SSHClient):
                 cpu_free, nodeSum = pbswrap.count_cpus(users, host, pbsnodes)
 
                 return nodeSum['used_cpu'] + cpu_free, cpu_free, cpu_user
-            except IOError as e:
-                raise e
-            except:
+            except Exception as e:
                 raise EnvironmentError("check resources failed")
 
     def jobids(self, jobname_prefix):
diff --git a/wetb/utils/cluster_tools/ssh_client.py b/wetb/utils/cluster_tools/ssh_client.py
index 97d6ed8d..3a2ed178 100644
--- a/wetb/utils/cluster_tools/ssh_client.py
+++ b/wetb/utils/cluster_tools/ssh_client.py
@@ -25,6 +25,8 @@ class SSHClient(object):
         self.key = key
         self.disconnect = 0
         self.client = None
+        self.sftp = None
+        self.transport = None
         if key is not None:
             self.key = paramiko.RSAKey.from_private_key(StringIO(key), password=passphrase)
 
@@ -33,7 +35,8 @@ class SSHClient(object):
 
     def __enter__(self):
         self.disconnect += 1
-        if self.client is None:
+        if self.client is None or self.client._transport is None or self.client._transport.is_active() is False:
+            self.close()
             self.connect()
         return self.client
 
@@ -44,9 +47,7 @@ class SSHClient(object):
         self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
         self.client.connect(self.host, self.port, username=self.username, password=self.password, pkey=self.key, timeout=self.TIMEOUT)
         assert self.client is not None
-        self.transport = paramiko.Transport((self.host, self.port))
-        self.transport.connect(username=self.username, password=self.password)
-        self.sftp = paramiko.SFTPClient.from_transport(self.transport)
+        self.sftp = paramiko.SFTPClient.from_transport(self.client._transport)
         return self
 
     def __exit__(self, *args):
@@ -85,11 +86,12 @@ class SSHClient(object):
             print (ret)
 
     def close(self):
-        if self.client is not None:
-            self.client.close()
-            self.client = None
-        self.sftp.close()
-        self.transport.close()
+        for x in ["sftp", "client" ]:
+            try:
+                getattr(self, x).close()
+                setattr(self, x, None)
+            except:
+                pass
         self.disconnect = False
 
     def file_exists(self, filename):
@@ -180,6 +182,7 @@ class SharedSSHClient(SSHClient):
 
         while self.next != threading.currentThread():
             time.sleep(1)
+        SSHClient.__enter__(self)
         return self.client
 
     def __exit__(self, *args):
-- 
GitLab