监控pbs运行状况

# 监控内存使用情况

job_id=163997
workdir=/share_bio/
echo "population_sizes" >> $workdir/pbs/pbs.job.$job_id.mem_used.logs

 while [ 1 -eq 1 ]
  do
     msg=`qstat -f $job_id`
     chars_count=${#msg}
     if (($chars_count<100))
     then
       break
     else
       qstat -f $job_id | grep resources_used.mem >> $workdir/pbs/pbs.job.$job_id.mem_used.logs
       sleep 60
     fi
   done

 

# 监控cpu使用情况

tracejob -n 365 2222

Job: 2222.centos64

07/30/2014 18:50:48  S    enqueuing into batch, state 1 hop 1
07/30/2014 18:50:48  S    Job Modified at request of root@centos64
07/30/2014 18:50:48  L    Job Run
07/30/2014 18:50:48  S    Job Run at request of root@centos64
07/30/2014 18:50:48  S    Not sending email: User does not want mail of this type.
07/30/2014 18:50:48  A    queue=batch
07/30/2014 18:50:48  A    user=aimin group=aimin jobname=cow5utr01 queue=batch ctime=1406717448 qtime=1406717448 etime=1406717448 start=1406717448 owner=aimin@centos64 exec_host=centos64/1 Resource_List.neednodes=1:ppn=1
                          Resource_List.nodect=1 Resource_List.nodes=1:ppn=1 Resource_List.walltime=1440:00:00
07/30/2014 19:15:14  M    scan_for_terminated: job 2222.centos64 task 1 terminated, sid=3480
07/30/2014 19:15:14  M    job was terminated
07/30/2014 19:15:15  S    Exit_status=0 resources_used.cput=00:24:21 resources_used.mem=9708kb resources_used.vmem=254684kb resources_used.walltime=00:24:26
07/30/2014 19:15:15  S    Not sending email: User does not want mail of this type.
07/30/2014 19:15:15  M    obit sent to server
07/30/2014 19:15:15  S    on_job_exit valid pjob: 2222.centos64 (substate=50)
07/30/2014 19:15:15  A    user=aimin group=aimin jobname=cow5utr01 queue=batch ctime=1406717448 qtime=1406717448 etime=1406717448 start=1406717448 owner=aimin@centos64 exec_host=centos64/1 Resource_List.neednodes=1:ppn=1
                          Resource_List.nodect=1 Resource_List.nodes=1:ppn=1 Resource_List.walltime=1440:00:00 session=3480 end=1406718915 Exit_status=0 resources_used.cput=00:24:21 resources_used.mem=9708kb resources_used.vmem=254684kb
                          resources_used.walltime=00:24:26
07/30/2014 19:15:20  M    removed job script
07/30/2014 19:20:23  S    dequeuing from batch, state COMPLETE

 

你可能感兴趣的:(监控)