#!/usr/bin/env python
#-*- coding: utf-8 -*-
import os
import sys
hadoop_binary = ''
hdfs_bin_dir = ''
def hdfsListDirs(hdfs_dir):
dirs = []
cmd = str('%s fs -ls %s' % (hadoop_binary, hdfs_dir))
result = os.popen(cmd).read().strip()
if not result.startswith('Found'):
return []
result_lines = result.split('\n')
for one_result in result_lines:
if one_result.startswith('Found'):
continue
result_seg = one_result.split()
attr = result_seg[0]
path = result_seg[-1]
if attr.startswith('d'):
dirs.append(path)
return dirs
def hdfsListFiles(hdfs_dir):
files = []
cmd = str('%s fs -ls %s' % (hadoop_binary, hdfs_dir))
print cmd
result = os.popen(cmd).read().strip()
if not result.startswith('Found'):
return []
result_lines = result.split("\n")
for one_result in result_lines:
if one_result.startswith('Found'):
continue
result_seg = one_result.split()
attr = result_seg[0]
path = result_seg[-1]
if not attr.startswith('d'):
files.append(path)
return files
def hdfsMv(src, dest):
cmd = str('%s fs -mv %s %s' % (hadoop_binary, src, dest))
print cmd
os.system(cmd)
def hdfsRmr(src):
cmd = str('%s fs -rmr %s' % (hadoop_binary, src))
print cmd
os.system(cmd)
def hdfsCopyToLocal(src, dest):
cmd = str('%s fs -copyToLocal %s %s' % (hadoop_binary, src, dest))
print cmd
os.system(cmd)
def localRmr(src):
cmd = str('rm -rf %s' % (src))
print cmd
os.system(cmd)
def localMv(src, dest):
cmd = str('mv %s %s' % (src, dest))
print cmd
os.system(cmd)
def localBak(src):
bak = src + '.bak'
localRmr(bak)
localMv(src, bak)
def localMkdir(path):
cmd = str('mkdir -p %s' % (path))
print cmd
os.system(cmd)
def baseNameInPath(path):
return os.path.basename(path)
def fullPath(file_dir, file_name):
return os.path.join(file_dir, file_name)
def dirInPath(path):
return os.path.dirname(path)
def listFiles(path, recursive=False):
all_files = []
if os.path.exists(path) and os.path.isdir(path):
if recursive:
for item in os.walk(path):
all_files += [os.path.join(item[0], file_name) for file_name in item[2]]
else:
all_files = [os.path.join(path, item) for item in os.listdir(path)\
if os.path.isfile(path+os.sep+item)]
else:
print path, 'not exist'
return all_files