//注意Linux主机
//删除指定目录中文件名以 .tmp 为后缀的文件
1 #!/usr/bin/python 2 import os , glob 3 dirname = '/tmp' 4 allpy = glob.glob(os.path.join(dirname,'*')) 5 for filename in allpy: 6 filesize = os.path.getsize(filename) 7 if(filename.endswith('tmp')): 8 try: 9 os.remove(filename) 10 except: 11 continue
//输出指定文件夹(包括子文件夹)中最大的两个文件
1 #!/usr/bin/python 2 import os 3 dirname = '/tmp' 4 allsize = [] 5 for (root,dirs,files) in os.walk(dirname): 6 for filename in files: 7 fullname = os.path.join(root,filename) 8 filesize = os.path.getsize(fullname) 9 allsize.append((filesize,fullname)) 10 allsize.sort() 11 print allsize[-2:]
//删除指定文件(包括子文件夹)中的所有文件及文件夹
1 import os 2 for root, dirs, files in os.walk(top, topdown=False): 3 for name in files: 4 os.remove(os.path.join(root, name)) 5 for name in dirs: 6 os.rmdir(os.path.join(root, name))
//删除指定文件中的文件序列
1 #!/usr/bin/python 2 import os 3 sequence = raw_input('Sequence:') 4 sequence_start = int(raw_input('Start_Frame:')) 5 sequence_end = int(raw_input('End_Frame:')) 6 #remove the whitespace before or after the 'sequence' 7 sequence = sequence.strip() 8 lst = sequence.split('/') 9 sequence_path = sequence[:-len(lst[-1])] 10 sequence_name = lst[-1] 11 filename_lst = sequence_name.split('#') 12 padding = len(filename_lst[1:-1])+1 13 for index in range(sequence_start,sequence_end+1,1): 14 file_path = sequence_path + filename_lst[0] + (padding-len(str(index)))*'0' + str(index) + filename_lst[-1] 15 os.remove(file_path) 16
运行后分别输入:
/srv/scratch/houdini_zpeng/Render/ship/shipA####.tif
101
300
会把序列文件shipA####.tif 从101-300帧删除
//找到文件的绝对路径
1 >>> os.path.abspath('memo.txt') 2 '/Users/csev/memo.txt'
// 判断文件或文件夹是否存在
1 >>> os.path.isdir('memo.txt') 2 False 3 >>> os.path.isdir('music') 4 True
//返回给定目录地址的文件夹和文件列表:
1 >>> os.listdir(cwd) 2 ['music', 'photos', 'memo.txt']
//得到文件夹(包含子文件夹)中txt文件的数量
1 import os 2 count = 0 3 for (dirname, dirs, files) in os.walk('.'): 4 for filename in files: 5 if filename.endswith('.txt') : 6 count = count + 1 7 print 'Files:', count
OUTPUT:
python txtcount.py
Files: 1917
os.walk 会递归的进入所有的文件夹和子文件夹,'.'字符表示当前文件夹。
//从命令行读取参数
1 import sys 2 print 'Count:', len(sys.argv) 3 print 'Type:', type(sys.argv) 4 for arg in sys.argv: 5 print 'Argument:', arg
sys.argv 内容是字符串类型的列表,列表的第一个是程序的名字,剩下的是在command line中程序名字后面的参数
//程序argtest从command line 接受多个参数
1 python argtest.py hello there
OUTPUT:
Count: 3 Type: <type 'list'> Argument: argtest.py Argument: hello Argument: there
//sys.argv的另一个例子
1 import sys 2 name = sys.argv[1] 3 handle = open(name, 'r') 4 text = handle.read() 5 print name, 'is', len(text), 'bytes'
OUTPUT:
1 python argfile.py mbox-short.txt 2 mbox-short.txt is 94626 bytes
//PIPE
//任何从Shell启动的程序(process),你都可以利用Python的“Pipe"来启动,A pipe is an object that represents a running process.
//利用 os.popen 启动 ls:upperCaseList
1 >>> cmd = 'ls -l' 2 >>> fp = os.popen(cmd)
os.popen的参数是一个字符串,包含一个命令,返回值是一个 file pointer, 类似 open file. 你可以利用readline(每次读一行)或者read(一下全部读完),来读取ls -l 进程(process)的执行结果。
>>> res = fp.read()
//完成操作后,你可以类似file的关闭,关闭pipe
>>> stat = fp.close() >>> print stat
None
fp.close()返回ls 进程(process)的执行状态,None 表示 正常关闭(没有错误)
//抓取一个网页,分析素据,其基本数据形式如下:
<tr><td>Modu</td><td><span class="comments">90</span></td></tr> <tr><td>Kenzie</td><td><span class="comments">88</span></td></tr> <tr><td>Hubert</td><td><span class="comments">87</span></td></tr>
//抓取span标签中的数字,把个数和总和计算出来
1 import urllib 2 from BeautifulSoup import * 3 url = raw_input("Enter -") 4 html = urllib.urlopen(url).read() 5 soup = BeautifulSoup(html) 6 sum = 0 7 count = 0 8 spans = soup("span") 9 for span in spans: 10 value = span.contents[0] //获取标签中的数值, 获取所有属性span.attrs,
//获取某个属性的值span.get('class', None),如果属性不存在,返回None 11 if value: 12 count +=1 13 sum +=int(value) 14 print("Count %s" % count) 15 print("Sum %s" % sum)
//输入网址:http://python-data.dr-chuck.net/comments_217954.html,得到如下数据
Enter - http://python-data.dr-chuck.net/comments_217954.html Count 50 Sum 2591
//newList = [do something for variable in old list if condition]
1 mixedList = [1, 2, 3, 'a', 'b', 'c'] 2 upperCaseList = [item.upper() for item in mixedList if isinstance(item, str)] 3 upperCaseList