#!/usr/bin/perl #!/bin/bash ## 最近查看队列使用情况 发现如下问题,用户使用SGE 集群的时候内存溢出 ## 此程序用于查看SGE (Sun Grid Engine) 整体集群监控 ##仅以此程序,帮助大家查看 自己任务状态,以免被杀!此工具归BGI所有,祝大家工作愉快! =head1 Name sge_cluster_queue.pl -- choose the queue observation =head1 Description This program can choose the bmk queue observation 1) look for the queue jobs operation and who is working state 2) find the jobs number total test number,and the user over mem jobs,Jobs-ID,detailed mem \ 3) statistics total use mem =head1 Version Author: Li linji, [email protected] Version: 1.0, Date: 2012-7-30 =head1 Usage --queue set you want look up queue (defined general.q) --mem set Use full details queue Jobs and mem (check over_mem,and jods-ID) --s set Task sort by (name,job,vf,mem && must set --mem)##defined name --t set Task state information (defined run) --help output help information to screen =head1 Exmple ./sge_cluster_queue.pl perl sge_cluster_queue.pl perl sge_cluster_queue.pl -queue general.q perl sge_cluster_queue.pl -queue general.q -mem perl sge_cluster_queue.pl -queue general.q -mem -s vf =cut use Getopt::Long; use FindBin qw($Bin $Script); use File::Basename qw(basename dirname); use Data::Dumper; my $Sort||="name"; my $St||="r"; my ($help,$mem); $queue_search ||= "general.q"; GetOptions( "help"=>\$help, "queue=s"=>\$queue_search, "mem"=>\$mem, "s=s"=>\$Sort, "t=s"=>\$St, ); die `pod2text $0` if ($Help); if ($Sort eq "1") { $Sort="name"; }elsif($Sort eq "2"){ $Sort="job"; }elsif($Sort eq "3"){ $Sort="vf"; }elsif($Sort eq "4"){ $Sort="mem"; } if ($Sort ne "vf" && $Sort ne "mem" && $Sort ne "name" && $Sort ne "job" ) { print STDERR <<SORt; -s : 1 or name : sort by name (default) 2 or job : sort by jobs number 3 or vf : sort by vf (need -m ) 4 or mem : sort by mem (need -m ) SORt exit 0; if (!defined $mem && ($Sort eq "mem" || $Sort eq "vf" )) { print STDERR <<SORt; -mem : get mem info -s : 3 or vf : sort by vf (need -m ) 4 or mem : sort by mem (need -m ) SORt exit 0; } } ############################################################################### #my $group=shift; #my $queue_search = shift; ######### ......, . grp ... ....################################################ ################################################################################ ### ....look users; my @User; my @users; my $users=(); ########may be change here##### my $user_start=`cat /etc/passwd |grep 'home'|cut -f 1 -d :`; chomp ($user_start); @User=split /\n/,$user_start; chomp(@User); for (my $i=0;$i<@User;$i++){ push @users,"$User[$i]"; } chomp (@users); ###################print userlist; printf "the (公司) information list<<<<<<<<<<<<<<<( . o . )\n"; printf "User: @users\n"; printf "Please Look Queue information:>>>>>>>>O(._.)O~\n"; my %temp_users =(); ######################### #Definition search"; # ######################### my $listinfo=`qstat -u @users`; ##### Look all queue_test my @task = split /\n/,$listinfo; shift @task ;shift @task ; my %out=();##$out{...} my %statue=();## .. my %Queue=();### .. my %Job=(); my %For_sort=();### .sort ...sort ... $For_sort{$name}=info for sort; my $jobs=''; ################################### look each anyone test; foreach my $job ( @task ) { $job=~s/^\s+//; my @tab=split /\s+/,$job;#### $tab[0] = job id ; $tab[3]= ... ; $tab[4] = .. ; $tab[7] = .. ; if ($tab[7]=~/(\S+)\@(compute-\d+-\d+)\./) { my $queue=$1; my $compute = $2; if ($queue eq $queue_search) { #$queue = $compute_grp{$compute}; } else { next; } $Queue{$queue}=1; $Job{$tab[0]}{'queue'}=$queue; if (exists $out{$tab[3]}{'queue'}{$queue}) {########## .... .... $out{$tab[3]}{'queue'}{$queue}++; } else { $out{$tab[3]}{'queue'}{$queue}=1; } } $statue{$tab[4]}=1; $Job{$tab[0]}{'status'}=$tab[4]; $Job{$tab[0]}{'user'}=$tab[3]; ###### ..sort if ($Sort eq "name") { $For_sort{$tab[3]}=$tab[3]; } elsif($Sort eq "job" && $tab[4] eq $St) { $For_sort{$tab[3]}++; } ###### $jobs.=$tab[0].','; if (exists $out{$tab[3]}{'status'}{$tab[4]} ) { ########.. .. $out{$tab[3]}{'status'}{$tab[4]}++; } else { $out{$tab[3]}{'status'}{$tab[4]}=1; } } ######################### .. ..group .... ...... ..... ....... .... > .... . ..id ..... - ..... my $overload=0; if (defined $mem) { my $detail=`qstat -j $jobs`;###... .. .... my @jobinfos=split /={2,}/,$detail;####....... foreach my $info (@jobinfos) { if ($info =~/job_number:\s+(\d+)/) { my $jobnum=$1;### job id my $user=$Job{$jobnum}{'user'}; my $status=$Job{$jobnum}{'status'}; my $hard_mem=0; if ($info =~/hard resource_list:\s+virtual_free=(\S+)\n/) { $hard_mem=$1;### ... .. $hard_mem= MEM_GMKT ($hard_mem); ### .. sort if ($Sort eq "vf" && $St eq $status) { $For_sort{$user}+=$hard_mem; } if (exists $out{$user}{'hard'}{'status'}{$status}) { $out{$user}{'hard'}{'status'}{$status}+=$hard_mem; }else{ $out{$user}{'hard'}{'status'}{$status}=$hard_mem; } if (exists $Job{$jobnum}{'queue'}) { my $queue=$Job{$jobnum}{'queue'}; if (exists $out{$user}{'hard'}{'queue'}{$queue}) { $out{$user}{'hard'}{'queue'}{$queue}+=$hard_mem; }else{ $out{$user}{'hard'}{'queue'}{$queue}=$hard_mem; } my $vmem=0; while ($info =~/vmem=(\S+), maxvmem=\S+/g) { my $mmm=$1; if ($mmm ne 'N/A') { $vmem+=MEM_GMKT ($mmm); } } ### .. sort look over test list; if ($Sort eq "mem" && $St eq $status) { $For_sort{$user}+=$vmem; } my $over=$vmem-$hard_mem; if ($over >0) { my $intover=(int (10*$over+0.5))/10; if (exists $out{$user}{'overid'}) { $out{$user}{'overid'}.=','.$jobnum; $out{$user}{'overmem'}.=','.$intover; $out{$user}{'overnum'}+=1; $out{$user}{'totalover'}+=$over; }else{ $out{$user}{'overid'}=$jobnum; $out{$user}{'overmem'}=$intover; $out{$user}{'overnum'}=1; $out{$user}{'totalover'}=$over; } $overload++; } if (exists $out{$user}{'mem'}{'queue'}{$queue}) { $out{$user}{'mem'}{'queue'}{$queue}+=$vmem; }else{ $out{$user}{'mem'}{'queue'}{$queue}=$vmem; } if (exists $out{$user}{'mem'}{'status'}{$status}) { $out{$user}{'mem'}{'status'}{$status}+=$vmem; }else{ $out{$user}{'mem'}{'status'}{$status}=$vmem; } } } } } } ######################################################## ########### sort ################################ my @sort_name=sort keys %For_sort; if ($Sort ne "name") { foreach my $k (keys %out) { if (!exists $For_sort{$k}) { $For_sort{$k}=0; } } @sort_name=sort {$For_sort{$a} <=> $For_sort{$b}} keys %For_sort; } ######################################################## ########### .. .. ################################ if (defined $mem) { printf "\n"; printf "#################################################################################################################################\n"; printf "%11s",'user'; print "\t".'number of jobs , vf (G) , vmem (G) '."\n"; } printf "%11s",'user'; my %total=(); my ($stat,$que,$user); ########### .. .. foreach $stat (keys %statue) { print ' '; printf "%12s",$stat; $total{'status'}{$stat}=0; if (defined $mem) { $total{'hard'}{'status'}{$stat}=0; $total{'mem'}{'status'}{$stat}=0; } } foreach $que (keys %Queue) { print ' '; printf "%12s",$queue_search; $total{'queue'}{$que}=0; if (defined $mem) { $total{'hard'}{'queue'}{$que}=0; $total{'mem'}{'queue'}{$que}=0; } } if ($overload>0) { print "\t"."over_jobs\tover_mem\t".'overload_id\'s'."\t".'overload_mem(G)'; } print "\n"; ######## group ... .. .. foreach $user (@sort_name ) { printf "%12s",$user; foreach $stat (keys %statue) { ######## .. .. my $outtab=''; if (exists $out{$user}{'status'}{$stat}) { $outtab.=$out{$user}{'status'}{$stat}; $total{'status'}{$stat}+=$out{$user}{'status'}{$stat}; if (defined $mem) { if (exists $out{$user}{'hard'}{'status'}{$stat}) { my $hard=(int (10*$out{$user}{'hard'}{'status'}{$stat} + 0.5))/10; $outtab.=','.$hard; $total{'hard'}{'status'}{$stat}+=$out{$user}{'hard'}{'status'}{$stat}; }else{ $outtab.=',-'; } if (exists $out{$user}{'mem'}{'status'}{$stat}) { my $vmem=(int (10*$out{$user}{'mem'}{'status'}{$stat} + 0.5))/10; $outtab.=','.$vmem; $total{'mem'}{'status'}{$stat}+=$out{$user}{'mem'}{'status'}{$stat}; }else{ $outtab.=',-'; } } }else{ $outtab.='-'; if (defined $mem) { $outtab.=',-,-'; } } print ' '; printf "%12s",$outtab; } foreach $que (keys %Queue) { ########## .... ..... my $outtab=''; if (exists $out{$user}{'queue'}{$que} ) { $outtab.=$out{$user}{'queue'}{$que}; $total{'queue'}{$que}+=$out{$user}{'queue'}{$que}; if (defined $mem) { if (exists $out{$user}{'hard'}{'queue'}{$que}) { my $hard=(int (10*$out{$user}{'hard'}{'queue'}{$que} + 0.5))/10; $outtab.=','.$hard; $total{'hard'}{'queue'}{$que}+=$out{$user}{'hard'}{'queue'}{$que}; }else{ $outtab.=',-'; } if (exists $out{$user}{'mem'}{'queue'}{$que}) { my $vmem=(int (10*$out{$user}{'mem'}{'queue'}{$que} + 0.5))/10; $outtab.=','.$vmem; $total{'mem'}{'queue'}{$que}+=$out{$user}{'mem'}{'queue'}{$que}; }else{ $outtab.=',-'; } } }else{ $outtab.='-'; if (defined $mem) { $outtab.=',-,-'; } } print ' '; printf "%12s",$outtab; } if ($overload>0) { if (exists $out{$user}{'overid'} ) { print "\t".$out{$user}{'overnum'}."\t".(int(10*$out{$user}{'totalover'}+0.5)/10)."\t".$out{$user}{'overid'}."\t".$out{$user}{'overmem'};; }else{ print "\t".'---'."\t".'---'."\t".'---'."\t".'---'; } } print "\n"; } printf "%11s",'total'; ############.. total foreach $stat (keys %statue) { my $outtab=''; $outtab.=$total{'status'}{$stat}; if (defined $mem) { my $hard=(int(10*$total{'hard'}{'status'}{$stat}+0.5))/10; $outtab.=','.$hard; my $vmem=(int(10*$total{'mem'}{'status'}{$stat}+0.5))/10; $outtab.=','.$vmem; } print ' '; printf "%12s",$outtab; } foreach $que (keys %Queue) { my $outtab=''; $outtab.=$total{'queue'}{$que}; if (defined $mem) { my $hard=(int(10*$total{'hard'}{'queue'}{$que}+0.5))/10; $outtab.=','.$hard; my $vmem=(int(10*$total{'mem'}{'queue'}{$que}+0.5))/10; $outtab.=','.$vmem; } print ' '; printf "%12s",$outtab; } print "\n"; printf "#################################################################################################################################\n";print "\n"; ########################################### ############# ... .... .sub over test ID # ########################################### sub MEM_GMKT{ my $num=shift; if ($num=~/(\S+)g/i) { $num=$1; }elsif ($num=~/(\S+)m/i) { $num=$1/1024; }elsif ($num=~/(\S+)k/i) { $num=$1/1048576;## 1048576 =1024*1024 }elsif ($num=~/(\S+)t/i) { $num=$1*1024;## }else{ } return $num; } __END__ ################################################################################################################################# user number of jobs , vf (G) , vmem (G) user r Eqw qw general.q over_jobs over_mem overload_id's overload_mem(G) dengdj 5,15,21.9 -,-,- -,-,- 5,15,21.9 3 7.7 7120606,7120607,7120609 0.9,1,5.8 heh 35,105,22.1 -,-,- -,-,- 35,105,22.1 --- --- --- --- huangl 1,5,0.3 -,-,- -,-,- 1,5,0.3 --- --- --- --- jiangchb 49,98,19.3 -,-,- 1,2,- 49,98,19.3 --- --- --- --- linfq -,-,- 3,6,- -,-,- -,-,- --- --- --- --- sunhy 50,300,74.6 -,-,- -,-,- 50,300,74.6 --- --- --- --- wangml 15,45,37.6 -,-,- -,-,- 15,45,37.6 7 10.1 7120577,7120586,7120594,7120708,7120869,7120898,7120907 1.6,1.6,0.8,1.7,1.1,1.6,1.6 zenghp -,-,- -,-,- 1,5,- -,-,- --- --- --- --- zhangjie -,-,- 2,4,- -,-,- -,-,- --- --- --- --- total 155,568,175.7 5,10,0 2,7,0 155,568,175.7 ################################################################################################################################# README user: the group queue general.q owner state( r: "number of jobs , vf (G) , vmem (G)" 1:numbers job 2:vf= mem 3:Actual use Eqw: 1:numbers job 2:vf= mem 3:Actual use qw: 1:numbers job 2:vf= mem 3:Actual use .. ) general.q: The current query queue over_jobs: over_jobs.number over_mem: over_mem.size overload_id's: the job-ID overload_mem(G) the job-ID's overload_mem
本文出自 “坚持就是胜利” 博客,转载请与作者联系!