有两份矢量要素类数据(几何类型为面),一新一旧。新数据在旧数据的基础上进行过一定的改动,对部分要素进行了增删改操作。现需对比两份数据,检测出其中差异的部分。
分析修改操作类型(数据差异性)
包括:新增要素、删除要素、修改要素(图形、属性单独或同时修改)
将修改操作的检测,转化为比较数据记录特征码的差异
使用MD5存储每条记录(每个要素)特征码信息.
每条记录添加一个MD5码字段,并计算其值。
特征码应能体现几何、属性两个维度的特性
要素MD码分为几何MD5码、属性MD5码两个部分。
几何MD5码通过求取面的中心点X、Y坐标(保留两位小数)构造而成的字符串计算。
属性MD5码通过拼接多个属性字段构造成的字符串计算生成。
要素的最终特征码,通过计算几何、属性MD码拼接而成的字符串的MD5获得。
过滤一致未修改的要素记录,对比剩余差异记录
通过表的并联(特征码匹配),求取一致并去除,得到两份数据互为差异的记录。
# -*-coding:gbk-*-
import arcpy
import os
import time
import sys
old_gdb_path = sys.argv[1] # 第一个GDB路径
new_gdb_path = sys.argv[2] # 第二个GDB路径
feature_class_name = sys.argv[3] # 要素类名称
compare_fields = sys.argv[4] # 要比较的字段(多个以英文半角逗号隔开,可为空)
temp_gdb_path = sys.argv[5] # 临时库GDB路径
# old_gdb_path = "E:\Temp\SD_Result.gdb"
# new_gdb_path = "E:\Temp\SD_Target.gdb"
# feature_class_name = "DLTB"
# compare_fields = "DLBM,KCDLBM,GDDB,GDLX,GDPDJB,TBXHDM,GDZZSXDM"
# temp_gdb_path = "E:\Temp\SD_Temp.gdb"
# 说明:传入新旧两个gdb路径、待检测更新要素类名、判断更新的字段集合(可为空,多个时以逗号分隔),
# 自动进行检测分析。结果要素类分别为OLD_CHANGE、NEW_CHANGE,其OID2对应原要素类的OBJECTID,
# 结果要素类为点,亦可用于精确定位原要素类
# 生成MD5码
code_block = """
def generate_md5(str):
if len(str) == 0:
str = 'abc'
import hashlib
m = hashlib.md5()
m.update(str)
return m.hexdigest().upper()"""
# 计算字段表达式
def concat_field_expression(field_list):
s = ""
if field_list and len(field_list) > 0:
for field in field_list:
if len(field) > 0:
if len(s) > 0:
s += "+(!{0}! if !{0}! else '')".format(field)
else:
s = "(!{0}! if !{0}! else '')".format(field)
return s
if __name__ == '__main__':
try:
time_begin = time.time()
old_name = "{0}_OLD".format(feature_class_name) #
new_name = "{0}_NEW".format(feature_class_name) #
old_name_point = "{0}_OLD_POINT".format(feature_class_name) #
new_name_point = "{0}_NEW_POINT".format(feature_class_name) #
old_name_change = "OLD_CHANGE"
new_name_change = "NEW_CHANGE"
old_name_join = "{0}_OLD_JOIN".format(feature_class_name) #
new_name_join = "{0}_NEW_JOIN".format(feature_class_name) #
# 从成果库、目标库导入数据到临时GDB中处理(如已存在,先删除)
arcpy.env.workspace = temp_gdb_path
name_array = [old_name, new_name, old_name_point, new_name_point, old_name_change, new_name_change,
old_name_join, new_name_join]
print name_array
for name in name_array:
print name
if arcpy.Exists(name):
print "Delete {0}".format(name)
arcpy.Delete_management(name)
# 第一个GDB要素类拷贝至临时库
if not arcpy.Exists(os.path.join(old_gdb_path, feature_class_name)):
raise Exception("Not Exists FeatureClass {0} in {1}".format(feature_class_name, old_gdb_path))
print "Copy from {0} to {1}".format(feature_class_name, old_gdb_path)
arcpy.Copy_management(os.path.join(old_gdb_path, feature_class_name), os.path.join(temp_gdb_path, old_name))
# 第二个GDB要素类拷贝至临时库
if not arcpy.Exists(os.path.join(new_gdb_path, feature_class_name)):
raise Exception("Not Exists FeatureClass {0} in {1}".format(feature_class_name, new_gdb_path))
print "Copy from {0} to {1}".format(feature_class_name, new_gdb_path)
arcpy.Copy_management(os.path.join(new_gdb_path, feature_class_name), os.path.join(temp_gdb_path, new_name))
# 拼接查询更新字段语句
field_expression = concat_field_expression(compare_fields.split(','))
print field_expression
# 第一个要素类添加XY字段、OID字段、图形MD5码字段、要素MD5码字段并赋值
print "AddGeometryAttributes_management to {0}".format(old_name)
arcpy.AddGeometryAttributes_management(old_name, "CENTROID_INSIDE")
print "AddField OID_OLD to {0}".format(old_name)
arcpy.AddField_management(old_name, "OID_OLD", "LONG")
print "AddField MD_ATTR_OLD to {0}".format(old_name)
arcpy.AddField_management(old_name, "MD_ATTR_OLD", "TEXT")
print "AddField TMP_ATTR_OLD to {0}".format(old_name)
arcpy.AddField_management(old_name, "TMP_ATTR_OLD", "TEXT")
print "AddField MD_XY_OLD to {0}".format(old_name)
arcpy.AddField_management(old_name, "MD_XY_OLD", "TEXT")
print "AddField MD_OLD to {0}".format(old_name)
arcpy.AddField_management(old_name, "MD_OLD", "TEXT")
print "CalculateField OID_OLD to {0}".format(old_name)
arcpy.CalculateField_management(old_name, "OID_OLD", "!OBJECTID!", "PYTHON_9.3")
print "CalculateField TMP_ATTR_OLD to {0}".format(old_name)
arcpy.CalculateField_management(old_name, "TMP_ATTR_OLD", field_expression, "PYTHON_9.3")
print "CalculateField TMP_ATTR_OLD to {0}".format(old_name)
arcpy.CalculateField_management(old_name, "MD_ATTR_OLD", "generate_md5(!TMP_ATTR_OLD!)", "PYTHON_9.3",
code_block)
print "CalculateField MD_XY_OLD to {0}".format(old_name)
arcpy.CalculateField_management(old_name, "MD_XY_OLD",
"generate_md5(('%.2f' % !INSIDE_X!)+('%.2f' % !INSIDE_Y!))", "PYTHON_9.3",
code_block)
print "CalculateField MD_OLD to {0}".format(old_name)
arcpy.CalculateField_management(old_name, "MD_OLD", "generate_md5(!MD_ATTR_OLD!+ !MD_XY_OLD!)", "PYTHON_9.3",
code_block)
# 第二个要素类添加XY字段、OID字段、图形MD5码字段、要素MD5码字段并赋值
print "AddGeometryAttributes_management to {0}".format(new_name)
arcpy.AddGeometryAttributes_management(new_name, "CENTROID_INSIDE")
print "AddField OID_NEW to {0}".format(new_name)
arcpy.AddField_management(new_name, "OID_NEW", "LONG")
print "AddField MD_ATTR_NEW to {0}".format(new_name)
arcpy.AddField_management(new_name, "MD_ATTR_NEW", "TEXT")
print "AddField TMP_ATTR_NEW to {0}".format(new_name)
arcpy.AddField_management(new_name, "TMP_ATTR_NEW", "TEXT")
print "AddField MD_XY_NEW to {0}".format(new_name)
arcpy.AddField_management(new_name, "MD_XY_NEW", "TEXT")
print "AddField MD_NEW to {0}".format(new_name)
arcpy.AddField_management(new_name, "MD_NEW", "TEXT")
print "CalculateField OID_NEW to {0}".format(new_name)
arcpy.CalculateField_management(new_name, "OID_NEW", "!OBJECTID!", "PYTHON_9.3")
print "CalculateField TMP_ATTR_NEW to {0}".format(old_name)
arcpy.CalculateField_management(new_name, "TMP_ATTR_NEW", field_expression, "PYTHON_9.3")
print "CalculateField MD_ATTR_NEW to {0}".format(new_name)
arcpy.CalculateField_management(new_name, "MD_ATTR_NEW",
"generate_md5(!TMP_ATTR_NEW!)", "PYTHON_9.3",
code_block)
print "CalculateField MD_XY_NEW to {0}".format(new_name)
arcpy.CalculateField_management(new_name, "MD_XY_NEW",
"generate_md5(('%.2f' % !INSIDE_X!)+('%.2f' % !INSIDE_Y!))", "PYTHON_9.3",
code_block)
print "CalculateField MD_NEW to {0}".format(new_name)
arcpy.CalculateField_management(new_name, "MD_NEW", "generate_md5(!MD_ATTR_NEW!+ !MD_XY_NEW!)", "PYTHON_9.3",
code_block)
# 面转点
print "FeatureToPoint_management from {0} to {1}".format(old_name, old_name_point)
arcpy.FeatureToPoint_management(old_name, old_name_point)
print "FeatureToPoint_management from {0} to {1}".format(new_name, new_name_point)
arcpy.FeatureToPoint_management(new_name, new_name_point)
# 备份一份,以供关联使用
print "Copy_management from {0} to {1}".format(old_name_point, old_name_join)
arcpy.Copy_management(old_name_point, old_name_join)
print "Copy_management from {0} to {1}".format(new_name_point, new_name_join)
arcpy.Copy_management(new_name_point, new_name_join)
# 表关联,导出无关联要素,取消关联
print "MakeFeatureLayer_management from {0} to LAYER_OLD".format(old_name_point)
arcpy.MakeFeatureLayer_management(old_name_point, "LAYER_OLD")
print "AddJoin_management..."
arcpy.AddJoin_management("LAYER_OLD", "MD_OLD", new_name_join, "MD_NEW", "KEEP_ALL")
print "FeatureClassToFeatureClass_conversion to {0}".format(old_name_change)
arcpy.FeatureClassToFeatureClass_conversion("LAYER_OLD", temp_gdb_path, old_name_change,
"{0}.MD_NEW IS NULL".format(new_name_join))
print "RemoveJoin_management..."
arcpy.RemoveJoin_management("LAYER_OLD", new_name_join)
print "MakeFeatureLayer_management from {0} to LAYER_NEW".format(new_name_point)
arcpy.MakeFeatureLayer_management(new_name_point, "LAYER_NEW")
print "AddJoin_management..."
arcpy.AddJoin_management("LAYER_NEW", "MD_NEW", old_name_join, "MD_OLD", "KEEP_ALL")
print "FeatureClassToFeatureClass_conversion to {0}".format(new_name_change)
arcpy.FeatureClassToFeatureClass_conversion("LAYER_NEW", temp_gdb_path, new_name_change,
"{0}.MD_OLD IS NULL".format(old_name_join))
print "RemoveJoin_management..."
arcpy.RemoveJoin_management("LAYER_NEW", old_name_join)
# 删除临时要素类
name_array = [old_name, new_name, old_name_point, new_name_point, old_name_join, new_name_join]
print name_array
for name in name_array:
print "Delete {0}".format(name)
arcpy.Delete_management(name)
# 第一个要素类删除冗余字段
oid_bak_field_name = ""
for field in arcpy.ListFields(old_name_change):
if field.name.upper().find("OID_OLD") >= 0:
oid_bak_field_name = field.name
print "Find Field {0}".format(field.name)
break
print "Add Field OID2 to {0}...".format(old_name_change)
arcpy.AddField_management(old_name_change, "OID2", "LONG")
print "Calculate Field OID2 to {0}...".format(old_name_change)
arcpy.CalculateField_management(old_name_change, "OID2", "!{0}!".format(oid_bak_field_name), "PYTHON_9.3")
for field in arcpy.ListFields(old_name_change):
if not field.editable:
continue
if field.name.upper().find("SHAPE") >= 0:
continue
if field.name.upper() == "OID2":
continue
print "Delete Field {0}...".format(field.name)
arcpy.DeleteField_management(old_name_change, field.name)
# 第二个要素类删除冗余字段
new_bak_field_name = ""
for field in arcpy.ListFields(new_name_change):
if field.name.upper().find("OID_NEW") >= 0:
new_bak_field_name = field.name
print "Find Field {0}".format(field.name)
break
print "Add Field OID2 to {0}...".format(new_name_change)
arcpy.AddField_management(new_name_change, "OID2", "LONG")
print "Calculate Field OID2 to {0}...".format(new_name_change)
arcpy.CalculateField_management(new_name_change, "OID2", "!{0}!".format(new_bak_field_name), "PYTHON_9.3")
for field in arcpy.ListFields(new_name_change):
if not field.editable:
continue
if field.name.upper().find("SHAPE") >= 0:
continue
if field.name.upper() == "OID2":
continue
print "Delete Field {0}...".format(field.name)
arcpy.DeleteField_management(new_name_change, field.name)
print "Total Time {0}".format(time.time() - time_begin)
print "1"
except Exception, e:
print e.message