python文件名排序:“字符串+数字”形式的文件名称排序脚本

当我们使用os.listdir(rootDir)获得该文件夹下所有文件名称后,会发现其是乱序的,而在大数据处理过程中,我们往往希望可以按照顺序将样本输入进我们的系统。因此需要对包含文件名的list进行排序。

下面是对“字符串+数字”形式的文件名称进行排序的脚本,参考了网络上诸多写法。保存仅供学习交流!

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 28 20:01:00 2019
基于字符串数字混合排序的Python脚本
@author: youxinlin
"""

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        pass

    try:
        import unicodedata
        unicodedata.numeric(s)
        return True
    except (TypeError, ValueError):
        pass

    return False


def find_continuous_num(astr, c):

    num = ''
    try:
        while not is_number(astr[c]) and c < len(astr):
            c += 1
        while is_number(astr[c]) and c < len(astr):
            num += astr[c]
            c += 1
    except:
        pass
    if num != '':
        return int(num)


def comp2filename(file1, file2):

    smaller_length = min(len(file1), len(file2))
    continuous_num = ''
    for c in range(0, smaller_length):
        if not is_number(file1[c]) and not is_number(file2[c]):
            # print('both not number')
            if file1[c] < file2[c]:
                return True
            if file1[c] > file2[c]:
                return False
            if file1[c] == file2[c]:
                if c == smaller_length - 1:
                    # print('the last bit')
                    if len(file1) < len(file2):
                        return True
                    else:
                        return False
                else:
                    continue
        if is_number(file1[c]) and not is_number(file2[c]):
            return True
        if not is_number(file1[c]) and is_number(file2[c]):
            return False
        if is_number(file1[c]) and is_number(file2[c]):
            if find_continuous_num(file1, c) < find_continuous_num(file2, c):
                return True
            else:
                return False
    # if file1 < file2:
    #     return True
    # else:
    #     return False


def sort_insert(lst):

    for i in range(1, len(lst)):
        x = lst[i]
        j = i
        while j > 0 and lst[j-1] > x:
        # while j > 0 and comp2filename(x, lst[j-1]):
            lst[j] = lst[j-1]
            j -= 1
        lst[j] = x
    return lst


def sort_insert_filename(lst):

    for i in range(1, len(lst)):
        x = lst[i]
        j = i
        # while j > 0 and lst[j-1] > x:
        while j > 0 and comp2filename(x, lst[j-1]):
            lst[j] = lst[j-1]
            j -= 1
        lst[j] = x
    return lst


def file_name_sort(all_file_list):

    new_list = []
    # all_file_list.sort(key=lambda x: int(x.split('.')[0].split('_')[2]))
    # for file in all_file_list:
    #     pass

    return new_list

#测试数据:
#print(sort_insert_filename(['a09', 'a2', 'b2', 'a10','a100', 'a01', 'a010', '_a3', 'a893', 'a90']))

 

你可能感兴趣的:(Python)