python操作hadoop脚本


#!/usr/bin/env python
#-*- coding: utf-8 -*-

import os
import sys

hadoop_binary = ''
hdfs_bin_dir = ''

def hdfsListDirs(hdfs_dir):
    dirs = []
    cmd = str('%s fs -ls %s' % (hadoop_binary, hdfs_dir))
    result = os.popen(cmd).read().strip()
    if not result.startswith('Found'):
        return []
    result_lines = result.split('\n')
    for one_result in result_lines:
        if one_result.startswith('Found'):
            continue
        result_seg = one_result.split()
        attr = result_seg[0]
        path = result_seg[-1]
        if attr.startswith('d'):
            dirs.append(path)
    return dirs

def hdfsListFiles(hdfs_dir):
    files = []
    cmd = str('%s fs -ls %s' % (hadoop_binary, hdfs_dir))
    print cmd
    result = os.popen(cmd).read().strip()
    if not result.startswith('Found'):
        return []
    result_lines = result.split("\n")
    for one_result in result_lines:
        if one_result.startswith('Found'):
            continue
        result_seg = one_result.split()
        attr = result_seg[0]
        path = result_seg[-1]
        if not attr.startswith('d'):
            files.append(path)
    return files
def hdfsMv(src, dest):
    cmd = str('%s fs -mv %s %s' % (hadoop_binary, src, dest))
    print cmd
    os.system(cmd)

def hdfsRmr(src):
    cmd = str('%s fs -rmr %s' % (hadoop_binary, src))
    print cmd
    os.system(cmd)

def hdfsCopyToLocal(src, dest):
    cmd = str('%s fs -copyToLocal %s %s' % (hadoop_binary, src, dest))
    print cmd
    os.system(cmd)

def localRmr(src):
    cmd = str('rm -rf %s' % (src))
    print cmd
    os.system(cmd)

def localMv(src, dest):
    cmd = str('mv %s %s' % (src, dest))
    print cmd
    os.system(cmd)

def localBak(src):
    bak = src + '.bak'
    localRmr(bak)
    localMv(src, bak)

def localMkdir(path):
    cmd = str('mkdir -p %s' % (path))
    print cmd
    os.system(cmd)

def baseNameInPath(path):
    return os.path.basename(path)

def fullPath(file_dir, file_name):
    return os.path.join(file_dir, file_name)

def dirInPath(path):
    return os.path.dirname(path)

def listFiles(path, recursive=False):
    all_files = []
    if os.path.exists(path) and os.path.isdir(path):
        if recursive:
            for item in os.walk(path):
                all_files += [os.path.join(item[0], file_name) for file_name in item[2]]
        else:
          all_files = [os.path.join(path, item) for item in os.listdir(path)\
              if os.path.isfile(path+os.sep+item)]
    else:
        print path, 'not exist'
    return all_files

 

你可能感兴趣的:(python)