博客太久没更新了,是要拔拔草。最近刚好在比较深入的学字符串正则化匹配的内容,顺便完成一个小作品。就是解析CPM报文的功能。(CPM is short for Container Pallet Message)。
一、好了,照样话不多说,贴上代码
# -*- coding=utf-8 -*-
# using python 3
# Author: Sysuzzd
# Under Project: HS ON ML WITH SCIKIT AND TENSORFLOW
# @Time : 2019-6-29 22:35
# ******************************************* Purpose & Illustration ***********************************************
# 解析CPM报,并生成标准格式CPM
# **********************************************************************************************************************
import sys, os
import re
def cpm_nor():
# 定义cpm文件地址
cpm_file_route = r'D:\ZeDongPythonLearn\HS ON ML WITH SCIKIT AND TENSORFLOW\Hands-On.Machine.Learning.with.Scikit-Learn.and.TensorFlow.2017\handson-ml-master\CPM4.txt'
with open(cpm_file_route, 'r') as cpm_file:
cpm_str = cpm_file.read()
# print(type(cpm_str)) # out:
# 定义777-200F机型cpm各货舱集装器正则匹配
flight_pattern = r'\w{2}\d{3,4}\/\d{2}\w{3}\d{2}\.\w\d{4}.+\w{3}\n' # 定义航班信息模式
flight_info = re.findall(flight_pattern, cpm_str)[0][:-1]
# print(flight_info)
maindeck_left_pattern = r'(MAIN\sDECK\sLEFT\sSIDE.*)\n.+DECK\sRIGHT'
maindeck_left_str = re.findall(maindeck_left_pattern, cpm_str, re.S | re.M)[0] # class list re.S 代表 . 可以代表任何字符,包括换行符 re.M 表示^ 和$ 是每行的开头结束
# print(maindeck_left_str)
# print(re.split(r'\n', maindeck_left_str))
# print(r'-R/' in maindeck_left_str)
# maindeck_right_pattern = r'(MAIN\sDECK\sRIGHT\sSIDE.*)\n.+LOWER\sDECK' # 这是错误的正则匹配,会导致少掉PR位置
maindeck_right_pattern = r'(MAIN\sDECK\sRIGHT\sSIDE.*)\nLOWER\sDECK'
maindeck_right_str = re.findall(maindeck_right_pattern, cpm_str, re.S | re.M)[0]
# print('check', maindeck_right_str)
lowerdeck_pattern = r'(LOWER\sDECK.*)\nBULK'
lowerdeck_str = re.findall(lowerdeck_pattern, cpm_str, re.S | re.M)[0]
while 'BULK' in lowerdeck_str:
lowerdeck_str = re.findall(r'(.*?)\nBULK', lowerdeck_str, re.S)[0]
# print(lowerdeck_str)
bulk_pattern = r'(BULK.*\nBULK?.*)\n'
bulk_str = re.findall(bulk_pattern, cpm_str, re.M)[0]
# print(bulk_str)
# print(re.split(r'\n', bulk_str))
main_left, main_right, main_center = check_main_center(maindeck_left_str, maindeck_right_str)
# print(main_center)
# print(main_left)
# print(main_right)
lower, lower_left, lower_right = check_lower_sides(lowerdeck_str)
# print('检查' , lower_left)
# print('检查' , re.split(r'\n', '\n'+lower_left)[1:])
# print(lower)
# 定义集装器字典
# print('check' , lower_left)
# print(lower_left == '\n')
flight_cpm = ULDs_777(main_left, main_right, main_center, lower, lower_left, lower_right, bulk_str)
# print(main_center)
# print(flight_cpm.main_left.uld_list)
# test_print_ulds_infos(flight_cpm) # 测试打印各uld的cpm格式是否正确和完整
print('Total weight of this flight is:', flight_cpm.total_weight(), 'KG')
print('The later imbalance weight of this flight is:', flight_cpm.imbalance_weight() , 'KG')
# 匹配集装器存进字典
# 按标准模式输出cpm
cpm_nor_txt = cpm_nor_output(flight_cpm, flight_info)
print(cpm_nor_txt)
return
def check_main_center(left, right):
# 待补充非R位置,中线装载时的情况
if r'-R/' in left:
center_pattern = r'-R.*|-[A-Z]LR.*'
center_sub_pattern = r'\n-R.*|\n-[A-Z]LR.*' # 记得要加\n,不然会留下空的一行
center = ''
# print('leng', len(re.findall(center_pattern, left)))
for i in re.findall(center_pattern, left):
center = center + '\n' + i
# center = '\n' + center
left1 = re.sub(center_sub_pattern, '', left)
right1 = right
return left1, right1, center
def check_lower_sides(lower):
lower_left_pattern = r'(-\d{2}L[^\sR]*)\s+' # lower_left_pattern = r'(-\d{2}L[^\s]*)\s+' 少了1个R会导致将LR的类型也算到L中去
lower_right_pattern = r'(-\d{2}R.*)\n'
lower_sides_sub_pattern = r'-\d{2}L.*-\d{2}R.*\n'
lower_left_list = re.findall(lower_left_pattern, lower)
# print('检查', lower_left_list)
lower_right_list = re.findall(lower_right_pattern, lower)
lower1 = re.sub(lower_sides_sub_pattern, '', lower)
# print(lower_left_list)
# print(lower_right_list)
lower_left = ''
lower_right = ''
if lower_left_list:
lower_left = lower_left_list[0]
if len(lower_left_list) >= 2:
for i in lower_left_list[1:]:
lower_left = lower_left + '\n' + i
lower_left = '\n' + lower_left
if lower_right_list:
lower_right = lower_right_list[0]
if len(lower_right_list) >= 2:
for i in lower_right_list[1:]:
lower_right = lower_right + '\n' + i
lower_right = '\n' + lower_right
# print(lower_left)
# print(lower_right)
# print(lower1)
return lower1, lower_left, lower_right
class ULDs_777(object):
def __init__(self, main_left, main_right, main_center, lower, lower_left, lower_right, bulk_str):
# print(main_left)
self.main_left = ULD(main_left)
self.main_right = ULD(main_right)
self.main_center = ULD(main_center)
# print(main_center)
self.lower = ULD(lower)
# print('check' , lower_left)
self.lower_left = ULD(lower_left)
self.lower_right = ULD(lower_right)
self.bulk = BULK(bulk_str)
return
def __str__(self):
return ('This is a ULD-777F class')
def total_weight(self):
totalweight = (self.main_left.weights + self.main_right.weights + self.main_center.weights
+ self.lower_left.weights + self.lower_right.weights + self.lower.weights
+ self.bulk.weights)
return totalweight
def imbalance_weight(self):
imbalanceweight = abs(self.main_right.weights + self.lower_right.weights
-self.main_left.weights - self.lower_left.weights)
return imbalanceweight
class ULD(object):
def __init__(self, cpm_str):
# print(cpm_str)
if not (cpm_str == '\n'):
self.uld_list = re.split(r'\n', cpm_str)[1:]
# print(self.uld_list)
self.weights, self.uldstrs = self.uld_normalized() # 调用类内部函数时,不用给形参
else:
self.uld_list = []
self.weights = 0
self.uldstrs = ''
def uld_normalized(self):
total_weight = 0
total_str = ''
weight_pattern = r'\/(\d{1,5})' # 单个集装器重量正则表达式
for uld in self.uld_list: # 对集装器进行循环
# print(uld)
# 计算重量
if 'NIL' in uld:
total_weight += 0
else:
# print(len(re.findall(weight_pattern, uld)))
weight_str = re.findall(weight_pattern, uld)[0]
weight = int(weight_str)
# print(weight)
total_weight += weight
# 生成标准格式CPM容器格式
posi_str = re.findall(r'(-\w{1,4})\/', uld)[0]
# print(posi_str)
if 'NIL' in uld:
total_str += '\n' + posi_str + '/NIL'
else:
uld_str = '/' + re.findall(r'\/([A-Z]{3}\d{5}[A-Z]{2,3})\/', uld)[0]
des_str = '/' + re.findall(r'\/([A-Z]{3})\/', uld)[0]
shape_str = '/' + re.findall(r'[Q][456ML][RL]?', uld)[0] if re.findall(r'[Q][456ML][RL]?', uld) else ''
cargotype_str = '/' + re.findall(r'([CMXT])\.', uld)[0] if re.findall(r'[CMXT]\.', uld) else ''
# note_str # 预留附加信息的部分
total_str += '\n' + posi_str + uld_str + '/' + weight_str + des_str + shape_str + cargotype_str
# print(total_str)
return total_weight, total_str
class BULK(object):
def __init__(self, cpm_str):
self.uld_list = re.split(r'\n', cpm_str)
self.weights, self.uldstrs = self.uld_normalized() # 调用类内部函数时,不用给形参
def uld_normalized(self):
total_weight = 0
total_str = ''
weight_pattern = r'\/(\d{1,5})\/' # 单个集装器重量正则表达式
for uld in self.uld_list: # 对集装器进行循环
# print(uld)
# 计算重量
if 'NIL' in uld:
total_weight += 0
else:
weight_str = re.findall(weight_pattern, uld)[0]
weight = int(weight_str)
# print(weight)
total_weight += weight
# 生成标准格式CPM容器格式
# print(posi_str)
if 'NIL' in uld:
total_str += '\n' + 'BLK' + '/NIL'
else:
des_str = '/' + re.findall(r'\/([A-Z]{3})\/', uld)[0]
cargotype_str = '/' + re.findall(r'([CMXT])\.', uld)[0] if re.findall(r'[CMXT]\.', uld) else ''
# note_str # 预留附加信息的部分
total_str += '\n' + 'BLK' + '/' + weight_str + des_str + cargotype_str
# print(total_str)
return total_weight, total_str
def test_print_ulds_infos(flight_cpm):
print('main left')
print(flight_cpm.main_left.weights)
print(flight_cpm.main_left.uldstrs[2:])
print('main right')
print(flight_cpm.main_right.weights)
print(flight_cpm.main_right.uldstrs[2:])
print('main center')
print(flight_cpm.main_center.weights)
print(flight_cpm.main_center.uldstrs[2:])
print('lower left')
print(flight_cpm.lower_left.weights)
print(flight_cpm.lower_left.uldstrs[2:])
print('lower right')
print(flight_cpm.lower_right.weights)
print(flight_cpm.lower_right.uldstrs[2:])
print('lower')
print(flight_cpm.lower.weights)
print(flight_cpm.lower.uldstrs[2:])
print('bulk')
print(flight_cpm.bulk.weights)
print(flight_cpm.bulk.uldstrs[0:])
return
def cpm_nor_output(flight_cpm, flight_info):
flight_info = re.sub(r'-', '', flight_info)
cpm_nor_txt = (
'CPM\n'
+ flight_info + '\n'
+ r'M/D RIGHT SIDE' + '\n'
+ flight_cpm.main_right.uldstrs[1:] + '\n'
+ r'M/D LEFT SIDE' + '\n'
+ flight_cpm.main_left.uldstrs[1:] + '\n'
+ r'M/D CENTER' + '\n'
+ flight_cpm.main_center.uldstrs[1:] + '\n'
+ r'L/D RIGHT SIDE' + '\n'
+ (flight_cpm.lower_right.uldstrs[1:] + '\n' if flight_cpm.lower_right.uldstrs else '')
+ r'L/D LEFT SIDE' + '\n'
+ (flight_cpm.lower_left.uldstrs[1:] + '\n' if flight_cpm.lower_left.uldstrs else '')
+ r'L/D CENTER' + '\n'
+ flight_cpm.lower.uldstrs[1:] + '\n'
+ flight_cpm.bulk.uldstrs[1:] + '\n'
+ 'CPM END'
)
return cpm_nor_txt
def main(argc, argv, envp):
cpm_nor()
return
if __name__ == '__main__':
sys.exit(main(len(sys.argv), sys.argv, os.environ))
二、以下是txt文件原文,复制到txt文件中,改一下上面代码的文件地址,就可以验证了。
CPM
CA162/01JUN18.B2099.AMS-PEK
PEK
MAIN DECK LEFT SIDE
-AL/PMC43371CA/PEK/1910/T.Q4.TRANSIT
-BL/PMC44072CA/PEK/2010/T.Q5.RMD TRANSIT
-CL/PMC42424CA/PEK/2075/T.Q5.RMD TRANSIT
-DL/PMC45301CA/PEK/2095/T.Q5.RMD TRANSIT
-EL/PMC45873CA/PEK/2100/T.Q5.RMD TRANSIT
-FL/PMC33417CA/PEK/2150/T.Q5.RMD TRANSIT
-GHL/PGA70214CA/PEK/5320/T.Q5.TRANSIT
-JL/PMC42648CA/PEK/2180/T.Q5.RMD TRANSIT
-KL/PMC41763CA/PEK/4365/C.Q5.0
-LLR/PMC32898CA/PEK/1048/C.Q5.0
-ML/PMC45517CA/PEK/2200/T.Q5.RMD TRANSIT
-PL/PMC41874CA/PEK/2360/T.Q4.TRANSIT
-R/PMC43988CA/PEK/2445/C.Q6.0
MAIN DECK RIGHT SIDE
-AR/PMC31985CA/PEK/3460/C.Q4.0
-BR/PMC42256CA/PEK/967/C.Q5.0
-CR/PMC42614CA/PEK/1001/C.Q5.0
-DR/PMC41867CA/PEK/1652/C.Q5.0
-ER/PMC41703CA/PEK/1730/C.Q5.0
-FR/PMC43395CA/PEK/2070/C.Q5.0.OHG FWD 3.OHG AFT 3
-GHR/PGA70195CA/PEK/5760/C.Q5.0
-JR/PMC43872CA/PEK/2620/C.Q5.0.OHG FWD 4.OHG AFT 4
-KR/PMC44475CA/PEK/1865/C.Q5.0
-MR/PMC32909CA/PEK/2685/C.Q5.0.OHG FWD 4.OHG AFT 4
-PR/PMC44124CA/PEK/3515/C.Q4.0
LOWER DECK
-11P/PMC42943CA/PEK/1505/T.TRANSIT
-12P/PMC32590CA/PEK/1685/T.EX MR TRANSIT
-13P/PMC44202CA/PEK/3390/C.QM
-21P/PMC43672CA/PEK/3495/C.QM
-22P/PMC42421CA/PEK/3510/C.QM
-23P/PMC41028CA/PEK/3620/C.QM
-31P/PMC45480CA/PEK/3680/C.QM
-33L/AKE74669CA/PEK/345 -33R/AKE73341CA/PEK/495/M.MAIL
-41L/AKE74301CA/PEK/557/M.MAIL -41R/AKE73803CA/PEK/400/M.MAIL
-41P/PMC45916CA/PEK/4005/T.EX 41P PER TRANSIT
-42P/PMC42374CA/PEK/4275/C.QM
BULK/NIL
BULK/PEK/780/C.
SI/TOTAL LOAD 93835KG - UNDERLOAD 9367KG
SI/ALL WEIGHTS IN KG - ULD WEIGHTS INCLUDED - MEASUREMENTS IN CMS
SI/ZFW 234579 LIZFW 35 MACAFW 24,3
SI/TOW 338084 LITOW 33 MACTOW 25,5
SI/TO FUEL 103505 TRIP FUEL 90217
SI/STAB TRIM 6,0
SI/LATERAL IMBALANCE 15 KG
CPM END
三、改了一下代码,通过捕捉异常方式实现python3命令行窗口下多行输入,实现手机端termux运行,效果如下,美美哒。