最近发生了一次因为hdfs的坏块导致的hive计算问题,因此写了个监控脚本用来监控hdfs的坏块,脚本如下:

#!/usr/bin/python
# -*- coding: utf8 -*-
#edit by ericni
#20140724
#monitor hdfs corrupt
import sys
import property
import sendmail
import re,os
reload(sys)
sys.setdefaultencoding('utf-8')
if __name__ == "__main__":
        corruptlist = []
        cmd = "hadoop fsck -list-corruptfileblocks"
        re = os.popen(cmd)
        result = re.readlines()
        print result
        for line in result:
                if "blk_" in line and ".Trash" not in line:
                #if "blk_" in line:
                        corruptlist.append(line)
        if len(corruptlist) != 0:
                mailcontent = """
                        
                        Hadoop集群坏块监控
                          
                           body { font-size: 14px; color: #333;background-color: #fff;}
                           td { border: 1px solid #C1DAD7;padding:"4px 10px"; line-height: 24px;}       
                           table {border-collapse: collapse; width: 96%s;}
                           .divtd {color:#E28E40;}
                           .divcss5{ color:#F00;}
                           """%("%")
                mailcontent += """
        坏块数量 %s,具体信息如下:

        
                                                                                                                                                                                   序号
                                                        块号
                                                        文件信息