Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
pbsutils
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
toolbox
pbsutils
Commits
d111a8be
Commit
d111a8be
authored
8 years ago
by
David Verelst
Browse files
Options
Downloads
Patches
Plain Diff
fix and improvements from wetb.utils MR
WindEnergyToolbox!16
parent
7be11ce0
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
pbswrap.py
+51
-57
51 additions, 57 deletions
pbswrap.py
with
51 additions
and
57 deletions
pbswrap.py
+
51
−
57
View file @
d111a8be
...
...
@@ -22,14 +22,14 @@ def print_dashboard(users, host, pbsnodes):
# jber 3 0 0 0
print
print
'
-
'
*
54
print
'
cpus
'
.
rjust
(
18
)
+
'
nodes
'
.
rjust
(
9
)
print
'
User
'
.
rjust
(
9
)
+
'
Running
'
.
rjust
(
9
)
+
'
Running
'
.
rjust
(
9
)
\
+
'
Queued
'
.
rjust
(
9
)
+
'
Waiting
'
.
rjust
(
9
)
+
'
Other
'
.
rjust
(
9
)
print
(
'
-
'
*
54
)
print
(
'
cpus
'
.
rjust
(
18
)
+
'
nodes
'
.
rjust
(
9
)
)
print
(
'
User
'
.
rjust
(
9
)
+
'
Running
'
.
rjust
(
9
)
+
'
Running
'
.
rjust
(
9
)
\
+
'
Queued
'
.
rjust
(
9
)
+
'
Waiting
'
.
rjust
(
9
)
+
'
Other
'
.
rjust
(
9
)
)
# nodeSum: overview (summation of all jobs) nodes per user:
# nodeSum = [running, queued, waiting, other, cpus]
nodeSum
=
[
0
,
0
,
0
,
0
,
0
]
print
'
-
'
*
54
print
(
'
-
'
*
54
)
# print all values in the table: the nodes used per user
#userlist = users['users'].keys()
#userlist.sort()
...
...
@@ -44,11 +44,11 @@ def print_dashboard(users, host, pbsnodes):
Q
=
users
[
uid
][
'
Q
'
]
W
=
users
[
uid
][
'
W
'
]
O
=
users
[
uid
][
'
E
'
]
+
users
[
uid
][
'
H
'
]
+
users
[
uid
][
'
T
'
]
\
+
users
[
uid
][
'
S
'
]
+
users
[
uid
][
'
O
'
]
+
users
[
uid
][
'
C
'
]
+
users
[
uid
][
'
S
'
]
+
users
[
uid
][
'
O
'
]
+
users
[
uid
][
'
C
'
]
cpus
=
users
[
uid
][
'
cpus
'
]
print
uid
.
rjust
(
9
)
+
str
(
cpus
).
rjust
(
9
)
+
str
(
R
).
rjust
(
9
)
\
+
str
(
Q
).
rjust
(
9
)
+
str
(
W
).
rjust
(
9
)
+
str
(
O
).
rjust
(
9
)
print
(
uid
.
rjust
(
9
)
+
str
(
cpus
).
rjust
(
9
)
+
str
(
R
).
rjust
(
9
)
\
+
str
(
Q
).
rjust
(
9
)
+
str
(
W
).
rjust
(
9
)
+
str
(
O
).
rjust
(
9
)
)
nodeSum
[
0
]
+=
R
nodeSum
[
1
]
+=
Q
nodeSum
[
2
]
+=
W
...
...
@@ -58,18 +58,18 @@ def print_dashboard(users, host, pbsnodes):
nr_nodes
=
pbsnodes
[
'
nr_nodes
'
]
down
=
pbsnodes
[
'
down
'
]
others
=
pbsnodes
[
'
others
'
]
total_cpu
=
host
[
'
cpu_per_node
'
]
*
nr_nodes
total_cpu
=
host
[
'
cpu_per_node
'
]
*
nr_nodes
# the summed up for each node status (queued, running,...)
print
'
-
'
*
54
print
'
total
'
.
rjust
(
9
)
+
str
(
nodeSum
[
4
]).
rjust
(
9
)
+
str
(
nodeSum
[
0
]).
rjust
(
9
)
\
print
(
'
-
'
*
54
)
print
(
'
total
'
.
rjust
(
9
)
+
str
(
nodeSum
[
4
]).
rjust
(
9
)
+
str
(
nodeSum
[
0
]).
rjust
(
9
)
\
+
str
(
nodeSum
[
1
]).
rjust
(
9
)
+
str
(
nodeSum
[
2
]).
rjust
(
9
)
\
+
str
(
nodeSum
[
3
]).
rjust
(
9
)
print
'
-
'
*
54
print
'
free
'
.
rjust
(
9
)
+
str
(
total_cpu
-
nodeSum
[
4
]).
rjust
(
9
)
\
+
str
(
nr_nodes
-
nodeSum
[
0
]
-
others
-
down
).
rjust
(
9
)
print
'
down
'
.
rjust
(
9
)
+
str
(
down
).
rjust
(
18
)
print
'
-
'
*
54
+
str
(
nodeSum
[
3
]).
rjust
(
9
)
)
print
(
'
-
'
*
54
)
print
(
'
free
'
.
rjust
(
9
)
+
str
(
total_cpu
-
nodeSum
[
4
]).
rjust
(
9
)
\
+
str
(
nr_nodes
-
nodeSum
[
0
]
-
others
-
down
).
rjust
(
9
)
)
print
(
'
down
'
.
rjust
(
9
)
+
str
(
down
).
rjust
(
18
)
)
print
(
'
-
'
*
54
)
print
...
...
@@ -79,7 +79,7 @@ def print_node_loading(users, host, nodes, nodesload):
"""
if
len
(
host
)
<
1
:
print
'
It is very quit, nobody is working on the cluster.
'
print
(
'
It is very quit, nobody is working on the cluster.
'
)
return
hostname
=
host
[
'
name
'
]
...
...
@@ -88,15 +88,15 @@ def print_node_loading(users, host, nodes, nodesload):
print
# print a header
if
hostname
==
'
gorm
'
:
print
'
-
'
*
79
header
=
'
|
'
.
join
([
str
(
k
).
center
(
5
)
for
k
in
range
(
1
,
13
,
1
)])
+
'
|
'
print
'
id
'
.
center
(
5
),
header
print
'
-
'
*
79
print
(
'
-
'
*
79
)
header
=
'
|
'
.
join
([
str
(
k
).
center
(
5
)
for
k
in
range
(
1
,
13
,
1
)])
+
'
|
'
print
(
'
id
'
.
center
(
5
),
header
)
print
(
'
-
'
*
79
)
elif
hostname
==
'
jess
'
:
print
'
-
'
*
126
header
=
'
|
'
.
join
([
str
(
k
).
center
(
5
)
for
k
in
range
(
1
,
21
,
1
)])
+
'
|
'
print
'
id
'
.
center
(
5
),
header
print
'
-
'
*
126
print
(
'
-
'
*
126
)
header
=
'
|
'
.
join
([
str
(
k
).
center
(
5
)
for
k
in
range
(
1
,
21
,
1
)])
+
'
|
'
print
(
'
id
'
.
center
(
5
),
header
)
print
(
'
-
'
*
126
)
# print who is using the nodes
for
node
in
sorted
(
nodes
):
...
...
@@ -104,7 +104,7 @@ def print_node_loading(users, host, nodes, nodesload):
# now we have a list of user on this node
try
:
users
=
sorted
(
nodesload
[
node
])
for
kk
in
range
(
len
(
users
),
cpunode
):
for
kk
in
range
(
len
(
users
),
cpunode
):
users
.
append
(
''
)
# limit uid names to 5 characters
printlist
=
'
|
'
.
join
([
k
[:
5
].
center
(
5
)
for
k
in
users
])
+
'
|
'
...
...
@@ -112,17 +112,17 @@ def print_node_loading(users, host, nodes, nodesload):
except
KeyError
:
printlist
=
status
.
center
(
5
)
print
node
,
printlist
print
(
node
,
printlist
)
# print a header
if
hostname
==
'
gorm
'
:
print
'
-
'
*
79
print
'
id
'
.
center
(
5
),
header
print
'
-
'
*
79
print
(
'
-
'
*
79
)
print
(
'
id
'
.
center
(
5
),
header
)
print
(
'
-
'
*
79
)
elif
hostname
==
'
jess
'
:
print
'
-
'
*
126
print
'
id
'
.
center
(
5
),
header
print
'
-
'
*
126
print
(
'
-
'
*
126
)
print
(
'
id
'
.
center
(
5
),
header
)
print
(
'
-
'
*
126
)
#print
...
...
@@ -176,7 +176,7 @@ def parse_pbsnode_lall(output):
return
pbsnodes
,
nodes
def
parse_qstat_n1
(
output
):
def
parse_qstat_n1
(
output
,
hostname
=
None
):
"""
Parse the output of qstat -n1
"""
...
...
@@ -188,26 +188,22 @@ def parse_qstat_n1(output):
host
=
{}
users
=
{}
# get the hostname
hostname
=
socket
.
gethostname
()
if
hostname
[:
5
]
==
'
g-000
'
:
host
[
'
name
'
]
=
'
gorm
'
host
[
'
cpu_per_node
'
]
=
12
else
:
# 272 nodes are 2 x 10 core (twenty) processors
if
hostname
is
None
:
hostname
=
socket
.
gethostname
()
if
'
jess
'
in
hostname
:
host
[
'
name
'
]
=
'
jess
'
#total_nodes = 80
host
[
'
cpu_per_node
'
]
=
20
else
:
host
[
'
name
'
]
=
'
gorm
'
host
[
'
cpu_per_node
'
]
=
12
# take the available nodes in nr_nodes: it excludes the ones
# who are down
#queue['_total_cpu_'] = cpu_node*nr_nodes
ii
=
0
for
line
in
output
:
# first 5 are not relevant
if
ii
<
5
:
ii
+=
1
for
line
in
output
[
5
:]:
if
len
(
line
.
strip
())
==
0
:
continue
items
=
line
.
split
()
queue
=
items
[
2
]
...
...
@@ -268,8 +264,6 @@ def parse_qstat_n1(output):
except
KeyError
:
nodesload
[
node
]
=
[
userid
]
ii
+=
1
return
users
,
host
,
nodesload
# FIXME: counts diffferent compared to launch.py....
...
...
@@ -296,8 +290,8 @@ def count_cpus(users, host, pbsnodes):
+
users
[
uid
][
'
O
'
]
+
users
[
uid
][
'
C
'
]
# free cpus
down_cpu
=
host
[
'
cpu_per_node
'
]
*
pbsnodes
[
'
down
'
]
total_cpu
=
host
[
'
cpu_per_node
'
]
*
pbsnodes
[
'
nr_nodes
'
]
down_cpu
=
host
[
'
cpu_per_node
'
]
*
pbsnodes
[
'
down
'
]
total_cpu
=
host
[
'
cpu_per_node
'
]
*
pbsnodes
[
'
nr_nodes
'
]
cpu_free
=
total_cpu
-
down_cpu
-
nodeSum
[
'
used_cpu
'
]
return
cpu_free
,
nodeSum
...
...
@@ -334,9 +328,9 @@ def create_input(walltime='00:59:59', queue='xpresq', pbs_in='pbs_in/', ppn=1,
Create a PBS script for a command. Optionally, define a python environment.
"""
pbs_err_file
=
os
.
path
.
join
(
pbs_out
,
jobname
+
'
.err
'
)
pbs_out_file
=
os
.
path
.
join
(
pbs_out
,
jobname
+
'
.out
'
)
pbs_in_file
=
os
.
path
.
join
(
pbs_in
,
jobname
+
'
.pbswrap
'
)
pbs_err_file
=
os
.
path
.
join
(
pbs_out
,
jobname
+
'
.err
'
)
pbs_out_file
=
os
.
path
.
join
(
pbs_out
,
jobname
+
'
.out
'
)
pbs_in_file
=
os
.
path
.
join
(
pbs_in
,
jobname
+
'
.pbswrap
'
)
pbs_script
=
PBS_TEMP
pbs_script
=
pbs_script
.
replace
(
'
[jobname]
'
,
jobname
)
...
...
@@ -348,8 +342,8 @@ def create_input(walltime='00:59:59', queue='xpresq', pbs_in='pbs_in/', ppn=1,
pbs_script
=
pbs_script
.
replace
(
'
[queue]
'
,
queue
)
pbs_script
=
pbs_script
.
replace
(
'
[commands]
'
,
commands
)
print
'
following commands will be executed on the cluster:
'
print
'
%s
'
%
(
commands
)
print
(
'
following commands will be executed on the cluster:
'
)
print
(
'
%s
'
%
(
commands
)
)
# make sure a pbs_in and pbs_out directory exists
if
not
os
.
path
.
exists
(
pbs_in
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment