最近拿到一个上游的xlsx, 需要自己加工处理取到自己想要的值,需要注意的知识点做个记录:
以下内容基于以下假设:
1, 文件名字为Data_A.xlsx
和Data_B.xlsx
, 其内容格式为:
A | B | C | D E | F |
---|---|---|---|---|
0x101 | 10 | 1 | 1024 | |
0x201 | 11 | 1 | 117 | |
0x301 | 12 | 3 | 31 | |
0x401 | 13 | 2 | 41 | |
0x501 | 14 | 1 | 126 | |
0x601 | 15 | 4 | 13 |
def clean():
current_path = os.getcwd()
for infile in glob.glob( os.path.join(current_path, '*.csv') ):
os.remove(infile)
if __name__ == '__main__':
clean()
sys.exit()
def parse_input(argv):
inputfile = ''
try:
opts, args = getopt.getopt(sys.argv[1:],"hi:",["help","ifile="])
except getopt.GetoptError:
sys.exit(2)
for opt, arg in opts:
if opt in ("-h","--help"):
print('python3 parse_xlsx.py -i ' )
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
return inputfile
raw.csv
文件def parse_xlsx(file):
current_path = os.getcwd()
filepath = current_path+'\\new.csv'
if (os.path.exists(filepath)) :
os.remove(filepath)
rawdf = pd.read_excel(file, sheet_name=0)
DataFrame(rawdf).to_csv('raw.csv', index=False, header=0)
raw.csv
文件newdf = pd.read_csv('raw.csv', sep=',', header=0, names=None)
D E
列进行处理:以此列的值作为排序依据先进行排序newdf = pd.read_csv('raw.csv', sep=',', header=0, names=None)
newdf_temp = newdf
if any(newdf_temp['D E'] == 2):
newdf_order_1 = newdf_temp.loc[newdf_temp['D E'] == 1]
newdf_order_2 = newdf_temp.loc[newdf_temp['D E'] == 2]
newdf = newdf_order_1._append(newdf_order_2, ignore_index=True)
newdf_order_3 = newdf_temp.loc[newdf_temp['D E'] == 3]
newdf = newdf._append(newdf_order_3, ignore_index=True)
newdf_order_4 = newdf_temp.loc[newdf_temp['D E'] == 4]
newdf = newdf._append(newdf_order_4, ignore_index=True)
A
列进行处理:去掉0x
, 并按8位对齐newdf['a'] = newdf['A'].astype('str') # the value of A column will handled as string
newdf['a'] = newdf['a'].str.replace(r"0x", "")
newdf_a = newdf['a'].str.zfill(8)
B
列进行处理:转成十六进制数, 并按8位对齐newdf['b'] = newdf['B'].astype('int')
newdf['b'] = newdf['b'].apply( hex )
newdf['b'] = newdf['b'].str.replace(r"0x", "")
newdf_b = newdf['b'].str.zfill(8)
C
列进行处理:全部为NaN, 忽略F
列进行处理:转成十六进制数, 并按8位对齐 newdf['f'] = newdf['F'].astype('int')
newdf['f'] = newdf['f'].apply( hex )
newdf['f'] = newdf['f'].str.replace(r"0x", "")
newdf_f = newdf['f'].str.zfill(8)
A
、B
、F
列进行拼接,结果存进new.csv
此处要注意拼接的前后顺序,注意是列拼接
newdf = newdf_a + newdf_b + newdf_f
DataFrame(newdf).to_csv('new.csv', index=False, header=False)
A
和B
两个文件def distinguish(file):
if "a".upper() in file.upper():
bfile = "A.bin"
elif "b".upper() in file.upper():
bfile = "B.bin"
else:
print("Please confirm input is a or b")
def byteorder_convert(para):
little_hex = bytearray.fromhex(para)
i =0
tmp = bytearray()
result = bytearray()
while i < len(little_hex):
tmp = little_hex[i:i+4]
tmp.reverse()
result = result + tmp
# print(result)
i += 4
str_little = "".join(format(x, "02x") for x in result)
return str_little
bfile
中def binary(bfile):
with open('new.csv', 'r') as f:
reader = csv.reader(f)
# print(type(reader))
file = open(bfile, "wb")
for row in reader:
row = byteorder_convert(''.join(row))
# print(row)
result = ''
t = '\\x'
i = 0
if len(row)%2 == 1:
print('String length illegal')
while i < len(row):
result = result + t + row[i:i+2]
i = i+2
s = result
b = eval('b' + '\'' + s + '\'')
# print(result)
# print(b)
file.write(bytes(b))
file.close()
print("Done! Please check", bfile)
bfile
中def array(file, newdf_a, newdf_b, newdf_f):
if "a".upper() in file.upper():
afile = "new_a.c"
elif "b".upper() in file.upper():
afile = "new_b.c"
else:
print("Please confirm input is a or b")
newdf = "{" + ".a = "+ newdf_a + ", .b = " + newdf_b + ", .f = " + newdf_f + "}"
DataFrame(newdf).to_csv('new.csv', index=False, header=False)
newdf = pd.read_csv('new.csv', sep=',', header=0, names=None)
# print(newdf)
DataFrame(newdf).to_csv(afile, sep=' ', index=False, header=False, quoting=csv.QUOTE_NONE, escapechar=' ')
print("Done! Please check", afile)
csv
中间文件def clean():
current_path = os.getcwd()
for infile in glob.glob( os.path.join(current_path, '*.csv') ):
os.remove(infile)