python list遍历时候删除还真需要注意下,今天帮同学处理数据,竟然傻逼了。
需求:
除了第一列,给每列加一个序号如:"1:0","2:0","3:20100307",然后删除冒号后为0的数据。
推荐做法:
arrays = [ ['5001', '0', '0', '20100307', '20150109', '2', '3', '75', '0', '0', '114', '13', '2', '0', '0'], ['10001', '0', '2', '20100318', '20150119', '2', '3', '58', '0', '0', '1', '5', '1', '0', '0'], ['15001', '0', '2', '20100329', '20150130', '9', '14', '70', '0', '0', '1', '31', '7', '11', '7'], ['20001', '0', '0', '20100401', '20150117', '3', '5', '71', '1', '75', '1', '1', '1', '0', '0'], ['22001', '0', '1', '20100401', '20150125', '7', '11', '58', '0', '0', '51', '40', '12', '2', '1'], ['24001', '0', '0', '20100401', '20141209', '1', '1', '138', '0', '0', '1', '1', '1', '0', '0'], ['44001', '1', '0', '20100428', '20141130', '1', '1', '85', '0', '0', '1', '0', '0', '0', '0'], ['45001', '1', '0', '20100429', '20141127', '1', '1', '128', '0', '0', '1', '24', '3', '0', '0'], ['50001', '0', '0', '20100506', '20141230', '1', '2', '67', '0', '0', '44', '6', '1', '4', '1'], ['69001', '0', '0', '20100524', '20150129', '16', '20', '90', '2', '220', '236', '44', '18', '3', '2'], ['71001', '0', '2', '20100526', '20141224', '9', '15', '64', '0', '0', '57', '30', '7', '11', '4'] ] for every_list in arrays: #add col number for i in range(len(every_list)): if i == 0: continue every_list[i] = str(i)+":"+every_list[i] #remove col which value==0 every_list = filter(lambda e:e.split(":")[-1] != '0' ,every_list) print every_list结果:
['5001', '3:20100307', '4:20150109', '5:2', '6:3', '7:75', '10:114', '11:13', '12:2'] ['10001', '2:2', '3:20100318', '4:20150119', '5:2', '6:3', '7:58', '10:1', '11:5', '12:1'] ['15001', '2:2', '3:20100329', '4:20150130', '5:9', '6:14', '7:70', '10:1', '11:31', '12:7', '13:11', '14:7'] ['20001', '3:20100401', '4:20150117', '5:3', '6:5', '7:71', '8:1', '9:75', '10:1', '11:1', '12:1'] ['22001', '2:1', '3:20100401', '4:20150125', '5:7', '6:11', '7:58', '10:51', '11:40', '12:12', '13:2', '14:1'] ['24001', '3:20100401', '4:20141209', '5:1', '6:1', '7:138', '10:1', '11:1', '12:1'] ['44001', '1:1', '3:20100428', '4:20141130', '5:1', '6:1', '7:85', '10:1'] ['45001', '1:1', '3:20100429', '4:20141127', '5:1', '6:1', '7:128', '10:1', '11:24', '12:3'] ['50001', '3:20100506', '4:20141230', '5:1', '6:2', '7:67', '10:44', '11:6', '12:1', '13:4', '14:1'] ['69001', '3:20100524', '4:20150129', '5:16', '6:20', '7:90', '8:2', '9:220', '10:236', '11:44', '12:18', '13:3', '14:2'] ['71001', '2:2', '3:20100526', '4:20141224', '5:9', '6:15', '7:64', '10:57', '11:30', '12:7', '13:11', '14:4']
其实一开始傻逼的做法:
arrays = [ ['5001', '0', '0', '20100307', '20150109', '2', '3', '75', '0', '0', '114', '13', '2', '0', '0'], ['10001', '0', '2', '20100318', '20150119', '2', '3', '58', '0', '0', '1', '5', '1', '0', '0'], ['15001', '0', '2', '20100329', '20150130', '9', '14', '70', '0', '0', '1', '31', '7', '11', '7'], ['20001', '0', '0', '20100401', '20150117', '3', '5', '71', '1', '75', '1', '1', '1', '0', '0'], ['22001', '0', '1', '20100401', '20150125', '7', '11', '58', '0', '0', '51', '40', '12', '2', '1'], ['24001', '0', '0', '20100401', '20141209', '1', '1', '138', '0', '0', '1', '1', '1', '0', '0'], ['44001', '1', '0', '20100428', '20141130', '1', '1', '85', '0', '0', '1', '0', '0', '0', '0'], ['45001', '1', '0', '20100429', '20141127', '1', '1', '128', '0', '0', '1', '24', '3', '0', '0'], ['50001', '0', '0', '20100506', '20141230', '1', '2', '67', '0', '0', '44', '6', '1', '4', '1'], ['69001', '0', '0', '20100524', '20150129', '16', '20', '90', '2', '220', '236', '44', '18', '3', '2'], ['71001', '0', '2', '20100526', '20141224', '9', '15', '64', '0', '0', '57', '30', '7', '11', '4'] ] for every_list in arrays: #add col number for i in range(len(every_list)): if i == 0: continue every_list[i] = str(i)+":"+every_list[i] #remove col which value==0 for j in every_list: if j.split(":")[-1] == '0': #if float(j.split(':')[-1]) == 0: every_list.remove(j) print every_list结果是错的:
['5001', '2:0', '3:20100307', '4:20150109', '5:2', '6:3', '7:75', '9:0', '10:114', '11:13', '12:2', '14:0'] ['10001', '2:2', '3:20100318', '4:20150119', '5:2', '6:3', '7:58', '9:0', '10:1', '11:5', '12:1', '14:0'] ['15001', '2:2', '3:20100329', '4:20150130', '5:9', '6:14', '7:70', '9:0', '10:1', '11:31', '12:7', '13:11', '14:7'] ['20001', '2:0', '3:20100401', '4:20150117', '5:3', '6:5', '7:71', '8:1', '9:75', '10:1', '11:1', '12:1', '14:0'] ['22001', '2:1', '3:20100401', '4:20150125', '5:7', '6:11', '7:58', '9:0', '10:51', '11:40', '12:12', '13:2', '14:1'] ['24001', '2:0', '3:20100401', '4:20141209', '5:1', '6:1', '7:138', '9:0', '10:1', '11:1', '12:1', '14:0'] ['44001', '1:1', '3:20100428', '4:20141130', '5:1', '6:1', '7:85', '9:0', '10:1', '12:0', '14:0'] ['45001', '1:1', '3:20100429', '4:20141127', '5:1', '6:1', '7:128', '9:0', '10:1', '11:24', '12:3', '14:0'] ['50001', '2:0', '3:20100506', '4:20141230', '5:1', '6:2', '7:67', '9:0', '10:44', '11:6', '12:1', '13:4', '14:1'] ['69001', '2:0', '3:20100524', '4:20150129', '5:16', '6:20', '7:90', '8:2', '9:220', '10:236', '11:44', '12:18', '13:3', '14:2'] ['71001', '2:2', '3:20100526', '4:20141224', '5:9', '6:15', '7:64', '9:0', '10:57', '11:30', '12:7', '13:11', '14:4']分析:
因为python list的时候删除元素,不好。
这就是不好的原因:
>>> s = [1,2,3,4,5] >>> s [1, 2, 3, 4, 5] >>> for i in s: s.remove(i) print s [2, 3, 4, 5] [2, 4, 5] [2, 4] >>>删除不干净。
arrays = [ ['5001', '0', '0', '20100307', '20150109', '2', '3', '75', '0', '0', '114', '13', '2', '0', '0'], ['10001', '0', '2', '20100318', '20150119', '2', '3', '58', '0', '0', '1', '5', '1', '0', '0'], ['15001', '0', '2', '20100329', '20150130', '9', '14', '70', '0', '0', '1', '31', '7', '11', '7'], ['20001', '0', '0', '20100401', '20150117', '3', '5', '71', '1', '75', '1', '1', '1', '0', '0'], ['22001', '0', '1', '20100401', '20150125', '7', '11', '58', '0', '0', '51', '40', '12', '2', '1'], ['24001', '0', '0', '20100401', '20141209', '1', '1', '138', '0', '0', '1', '1', '1', '0', '0'], ['44001', '1', '0', '20100428', '20141130', '1', '1', '85', '0', '0', '1', '0', '0', '0', '0'], ['45001', '1', '0', '20100429', '20141127', '1', '1', '128', '0', '0', '1', '24', '3', '0', '0'], ['50001', '0', '0', '20100506', '20141230', '1', '2', '67', '0', '0', '44', '6', '1', '4', '1'], ['69001', '0', '0', '20100524', '20150129', '16', '20', '90', '2', '220', '236', '44', '18', '3', '2'], ['71001', '0', '2', '20100526', '20141224', '9', '15', '64', '0', '0', '57', '30', '7', '11', '4'] ] for every_list in arrays: #add col number for i in range(len(every_list)): if i == 0: continue every_list[i] = str(i)+":"+every_list[i] #remove col which value==0 for j in every_list[1:]: if j.split(":")[-1] == '0': #if float(j.split(':')[-1]) == 0: every_list.remove(j) print every_list
因为list[1:]产生一个新的副本,也就是避开了一边遍历,一边删除的尴尬,而是遍历副本,删除原对象。