Java code:
- '''''
- Created on 2012-11-23
- @author: User
- '''
- import os
- import stat
- import shutil
- import traceback
- from chardet.universaldetector import UniversalDetector
- SPACE = 5
- DEBUG = True
- def main():
- #convertMatchedFilesEncoding(os.getcwd(), "GBK", "UTF-8", [".py"], True, True)
- #countMatchedFilesLines(os.getcwd(),[".py"], True)
- #deleteFiles(os.getcwd(), [".bak"], True)
- fileinfos = getFilesEncoding(os.getcwd(), [".java"], True)
- for info in fileinfos:
- print(info)
- #autoConvertFilesEncoding(os.getcwd(), "UTF-8", [".java"], True)
- pass
- def logPrint(value, *args):
- if DEBUG:
- print(value, *args)
- def getFileEncoding(path):
- try:
- file = open(path, "rb")
- detector = UniversalDetector()
- detector.reset()
- for line in file.readlines():
- detector.feed(line)
- if detector.done:
- break
- detector.close()
- return detector.result
- except Exception as err:
- logPrint("detect file encoding failed,file:%s.\ncause:%s"%(path, err))
- finally:
- if "file" in locals():
- file.close()
- def getFilesEncoding(path, fileTypes, subFile = False):
- result = []
- def getMatchedFilesEncoding(path, fileTypes, subFile):
- files = os.listdir(path)
- for file in files:
- file = os.path.join(path, file)
- if os.path.isfile(file) and isFileMatched(file, fileTypes):
- fileEncodingInfo = getFileEncoding(file)
- fileEncodingInfo["path"] = file
- result.append(fileEncodingInfo)
- elif os.path.isdir(file) and subFile:
- getMatchedFilesEncoding(file, fileTypes, subFile)
- getMatchedFilesEncoding(path, fileTypes, subFile)
- return result
- def autoConvertFileEncoding(path, targetEncoding, backup = False):
- if os.path.isfile(path):
- fileEncodingInfo = getFileEncoding(path)
- encoding = fileEncodingInfo["encoding"]
- convertFileEncoding(path, encoding, targetEncoding, backup)
- def autoConvertFilesEncoding(path, targetEncoding, fileTypes, subFile = False, backup = False):
- files = os.listdir(path)
- for file in files:
- file = os.path.join(path, file)
- if os.path.isfile(file) and isFileMatched(file, fileTypes):
- autoConvertFileEncoding(file, targetEncoding, backup)
- elif os.path.isdir(file) and subFile:
- autoConvertFilesEncoding(file, targetEncoding, fileTypes, subFile, backup)
- def convertFileEncoding(path, sourceEncoding, targetEncoding, backup = False):
- try:
- if backup:
- shutil.copyfile(path, path + ".bak")
- sourceFile = open(path, "r", encoding = sourceEncoding)
- lines = sourceFile.readlines()
- print(lines)
- if sourceEncoding.upper() == "UTF-8" and lines[0].startswith("\ufeff"):
- lines[0] = lines[0][1:]
- targetFile = open(path + ".temp", "w", encoding = targetEncoding)
- targetFile.writelines(lines)
- except Exception as err:
- #logPrint(traceback.format_exc())
- logPrint("convert file encoding failed,file:%s.\ncause:%s"%(path, err))
- finally:
- if "sourceFile" in locals():
- sourceFile.close()
- if "targetFile" in locals():
- targetFile.close()
- if os.path.exists(path + ".temp"):
- deleteFile(path)
- os.rename(path + ".temp", path)
- def convertFilesEncoding(path, sourceEncoding, targetEncoding, fileTypes, subFile = False, backup = False):
- files = os.listdir(path)
- for file in files:
- file = os.path.join(path, file)
- if os.path.isfile(file) and isFileMatched(file, fileTypes):
- convertFileEncoding(file, sourceEncoding, targetEncoding, backup)
- elif os.path.isdir(file) and subFile:
- convertFilesEncoding(file, sourceEncoding, targetEncoding, fileTypes, subFile, backup)
- def countFileLines(path):
- line = 0
- if os.path.isfile(path):
- sourceFile = open(path, "rb")
- lines = sourceFile.readlines()
- lines = [x for x in lines if x.strip()]
- line = len(lines)
- return line
- def countFilesLines(path, fileTypes, subFile = False):
- result = []
- def countMatchedFilesLines(path, fileTypes, subFile):
- files = os.listdir(path)
- for file in files:
- file = os.path.join(path, file)
- if os.path.isfile(file) and isFileMatched(file, fileTypes):
- result.append({"path":file, "lines":countFileLines(file)})
- elif os.path.isdir(file) and subFile:
- countMatchedFilesLines(file, fileTypes, subFile)
- countMatchedFilesLines(path, fileTypes, subFile)
- return result
- def getFileType(path):
- fileType = None
- if os.path.isfile(path):
- baseName = os.path.basename(path)
- lastIndex = baseName.rfind(".")
- if lastIndex != -1:
- fileType = baseName[lastIndex:]
- return fileType
- def isFileMatched(path, fileTypes):
- result = False
- fileType = getFileType(path)
- if fileTypes and fileType:
- if (type(fileTypes) == list or type(fileTypes) == tuple or type(fileTypes) == set) and (fileType in fileTypes):
- result = True
- return result
- def deleteFile(path):
- if os.path.isfile(path):
- os.chmod(path, stat.S_IWRITE)
- os.remove(path)
- def deleteFiles(path, fileTypes, subFile = False):
- files = os.listdir(path)
- for file in files:
- file = os.path.join(path, file)
- if os.path.isfile(file) and isFileMatched(file, fileTypes):
- deleteFile(file, fileTypes)
- elif os.path.isdir(file) and subFile:
- deleteFiles(file, fileTypes, subFile)
- if __name__ == '__main__':
- main()