采用python获得并修改文件编码(原创)

windows和linux采用了不同的编码,这让很多人伤透了脑经,这里我采用了Python的chardet库获得代码的编码,然后修改编码。

1、首先需要安装chardet库,有很多方式,我才用的是比较笨的方式:sudo pip install chardet

2、废话不多说,直接上代码,同样废话一句,小弟是初手,仅供参考,请大家多多指教。

python版本:

 

#!/usr/bin/env python
# coding: UTF-8
import sys
import os
import chardet
 
 
def print_usage ():
  print '''usage:
  change_charset [file|directory] [charset] [output file] \n
  for example:
    change 1.txt utf-8 n1.txt
    change 1.txt utf-8
    change . utf-8
    change 1.txt
'''
 
 
def get_charset ( s ):
  return chardet . detect ( s )[ 'encoding' ]
 
 
def remove ( file_name ):
  os . remove ( file_name )
 
 
def change_file_charset ( file_name , output_file_name , charset ):
  f = open ( file_name )
  s = f . read ()
  f . close ()
 
  if file_name == output_file_name or output_file_name == "" :
    remove ( file_name )
 
  old_charset = get_charset ( s )
  u = s . decode ( old_charset )
 
  if output_file_name == "" :
    output_file_name = file_name
  f = open ( output_file_name , 'w' )
  s = u . encode ( charset )
  f . write ( s )
  f . close ()
 
 
def do ( file_name , output_file_name , charset ):
  if os . path . isdir ( file_name ):
    for item in os . listdir ( file_name ):
      try :
        if os . path . isdir ( file_name + "/" + item ):
          do ( file_name + "/" + item , "" , charset )
        else :
          change_file_charset ( file_name + "/" + item , "" , charset )
      except OSError , e :
        print e
  else :
    change_file_charset ( file_name , output_file_name , charset )
 
 
if __name__ == '__main__' :
  length = len ( sys . argv )
 
  if length == 1 :
    print_usage ()
  elif length == 2 :
    do ( sys . argv [ 1 ], "" , "utf-8" )
  elif length == 3 :
    do ( sys . argv [ 1 ], "" , sys . argv [ 2 ])
  elif length == 4 :
    do ( sys . argv [ 1 ], sys . argv [ 3 ], sys . argv [ 2 ])
  else :
    print_usage ()

你可能感兴趣的:(python)