基于ArcPy的矢量数据变化检测思路与方法

文章目录

  • 背景
  • 思路
  • 实现

背景

有两份矢量要素类数据(几何类型为面),一新一旧。新数据在旧数据的基础上进行过一定的改动,对部分要素进行了增删改操作。现需对比两份数据,检测出其中差异的部分。

思路

  1. 分析修改操作类型(数据差异性)
    包括:新增要素、删除要素、修改要素(图形、属性单独或同时修改)

  2. 将修改操作的检测,转化为比较数据记录特征码的差异
    使用MD5存储每条记录(每个要素)特征码信息.

    每条记录添加一个MD5码字段,并计算其值。

  3. 特征码应能体现几何、属性两个维度的特性
    要素MD码分为几何MD5码、属性MD5码两个部分。
    几何MD5码通过求取面的中心点X、Y坐标(保留两位小数)构造而成的字符串计算。
    属性MD5码通过拼接多个属性字段构造成的字符串计算生成。
    要素的最终特征码,通过计算几何、属性MD码拼接而成的字符串的MD5获得。

  4. 过滤一致未修改的要素记录,对比剩余差异记录
    通过表的并联(特征码匹配),求取一致并去除,得到两份数据互为差异的记录。

实现

# -*-coding:gbk-*-
import arcpy
import os
import time
import sys

old_gdb_path = sys.argv[1]  # 第一个GDB路径
new_gdb_path = sys.argv[2]  # 第二个GDB路径
feature_class_name = sys.argv[3]  # 要素类名称
compare_fields = sys.argv[4]  # 要比较的字段(多个以英文半角逗号隔开,可为空)
temp_gdb_path = sys.argv[5]  # 临时库GDB路径

# old_gdb_path = "E:\Temp\SD_Result.gdb"
# new_gdb_path =  "E:\Temp\SD_Target.gdb"
# feature_class_name =  "DLTB"
# compare_fields = "DLBM,KCDLBM,GDDB,GDLX,GDPDJB,TBXHDM,GDZZSXDM"
# temp_gdb_path = "E:\Temp\SD_Temp.gdb"

# 说明:传入新旧两个gdb路径、待检测更新要素类名、判断更新的字段集合(可为空,多个时以逗号分隔),
# 自动进行检测分析。结果要素类分别为OLD_CHANGE、NEW_CHANGE,其OID2对应原要素类的OBJECTID,
# 结果要素类为点,亦可用于精确定位原要素类

# 生成MD5码
code_block = """
def generate_md5(str):
    if len(str) == 0:
        str = 'abc'
    import hashlib
    m = hashlib.md5()
    m.update(str)
    return m.hexdigest().upper()"""


# 计算字段表达式
def concat_field_expression(field_list):
    s = ""
    if field_list and len(field_list) > 0:
        for field in field_list:
            if len(field) > 0:
                if len(s) > 0:
                    s += "+(!{0}! if !{0}! else '')".format(field)
                else:
                    s = "(!{0}! if !{0}! else '')".format(field)
    return s


if __name__ == '__main__':
    try:
        time_begin = time.time()

        old_name = "{0}_OLD".format(feature_class_name)  #
        new_name = "{0}_NEW".format(feature_class_name)  #

        old_name_point = "{0}_OLD_POINT".format(feature_class_name)  #
        new_name_point = "{0}_NEW_POINT".format(feature_class_name)  #

        old_name_change = "OLD_CHANGE"
        new_name_change = "NEW_CHANGE"

        old_name_join = "{0}_OLD_JOIN".format(feature_class_name)  #
        new_name_join = "{0}_NEW_JOIN".format(feature_class_name)  #

        # 从成果库、目标库导入数据到临时GDB中处理(如已存在,先删除)
        arcpy.env.workspace = temp_gdb_path
        name_array = [old_name, new_name, old_name_point, new_name_point, old_name_change, new_name_change,
                      old_name_join, new_name_join]
        print name_array
        for name in name_array:
            print name
            if arcpy.Exists(name):
                print "Delete {0}".format(name)
                arcpy.Delete_management(name)

        # 第一个GDB要素类拷贝至临时库
        if not arcpy.Exists(os.path.join(old_gdb_path, feature_class_name)):
            raise Exception("Not Exists FeatureClass {0} in {1}".format(feature_class_name, old_gdb_path))
        print "Copy from {0} to {1}".format(feature_class_name, old_gdb_path)
        arcpy.Copy_management(os.path.join(old_gdb_path, feature_class_name), os.path.join(temp_gdb_path, old_name))

        # 第二个GDB要素类拷贝至临时库
        if not arcpy.Exists(os.path.join(new_gdb_path, feature_class_name)):
            raise Exception("Not Exists FeatureClass {0} in {1}".format(feature_class_name, new_gdb_path))
        print "Copy from {0} to {1}".format(feature_class_name, new_gdb_path)
        arcpy.Copy_management(os.path.join(new_gdb_path, feature_class_name), os.path.join(temp_gdb_path, new_name))

        # 拼接查询更新字段语句
        field_expression = concat_field_expression(compare_fields.split(','))

        print field_expression

        # 第一个要素类添加XY字段、OID字段、图形MD5码字段、要素MD5码字段并赋值
        print "AddGeometryAttributes_management to {0}".format(old_name)
        arcpy.AddGeometryAttributes_management(old_name, "CENTROID_INSIDE")
        print "AddField OID_OLD to {0}".format(old_name)
        arcpy.AddField_management(old_name, "OID_OLD", "LONG")
        print "AddField MD_ATTR_OLD to {0}".format(old_name)
        arcpy.AddField_management(old_name, "MD_ATTR_OLD", "TEXT")
        print "AddField TMP_ATTR_OLD to {0}".format(old_name)
        arcpy.AddField_management(old_name, "TMP_ATTR_OLD", "TEXT")
        print "AddField MD_XY_OLD to {0}".format(old_name)
        arcpy.AddField_management(old_name, "MD_XY_OLD", "TEXT")
        print "AddField MD_OLD to {0}".format(old_name)
        arcpy.AddField_management(old_name, "MD_OLD", "TEXT")

        print "CalculateField OID_OLD to {0}".format(old_name)
        arcpy.CalculateField_management(old_name, "OID_OLD", "!OBJECTID!", "PYTHON_9.3")
        print "CalculateField TMP_ATTR_OLD to {0}".format(old_name)
        arcpy.CalculateField_management(old_name, "TMP_ATTR_OLD", field_expression, "PYTHON_9.3")
        print "CalculateField TMP_ATTR_OLD to {0}".format(old_name)
        arcpy.CalculateField_management(old_name, "MD_ATTR_OLD", "generate_md5(!TMP_ATTR_OLD!)", "PYTHON_9.3",
                                        code_block)
        print "CalculateField MD_XY_OLD to {0}".format(old_name)
        arcpy.CalculateField_management(old_name, "MD_XY_OLD",
                                        "generate_md5(('%.2f' % !INSIDE_X!)+('%.2f' % !INSIDE_Y!))", "PYTHON_9.3",
                                        code_block)
        print "CalculateField MD_OLD to {0}".format(old_name)
        arcpy.CalculateField_management(old_name, "MD_OLD", "generate_md5(!MD_ATTR_OLD!+ !MD_XY_OLD!)", "PYTHON_9.3",
                                        code_block)

        # 第二个要素类添加XY字段、OID字段、图形MD5码字段、要素MD5码字段并赋值
        print "AddGeometryAttributes_management to {0}".format(new_name)
        arcpy.AddGeometryAttributes_management(new_name, "CENTROID_INSIDE")
        print "AddField OID_NEW to {0}".format(new_name)
        arcpy.AddField_management(new_name, "OID_NEW", "LONG")
        print "AddField MD_ATTR_NEW to {0}".format(new_name)
        arcpy.AddField_management(new_name, "MD_ATTR_NEW", "TEXT")
        print "AddField TMP_ATTR_NEW to {0}".format(new_name)
        arcpy.AddField_management(new_name, "TMP_ATTR_NEW", "TEXT")
        print "AddField MD_XY_NEW to {0}".format(new_name)
        arcpy.AddField_management(new_name, "MD_XY_NEW", "TEXT")
        print "AddField MD_NEW to {0}".format(new_name)
        arcpy.AddField_management(new_name, "MD_NEW", "TEXT")

        print "CalculateField OID_NEW to {0}".format(new_name)
        arcpy.CalculateField_management(new_name, "OID_NEW", "!OBJECTID!", "PYTHON_9.3")
        print "CalculateField TMP_ATTR_NEW to {0}".format(old_name)
        arcpy.CalculateField_management(new_name, "TMP_ATTR_NEW", field_expression, "PYTHON_9.3")
        print "CalculateField MD_ATTR_NEW to {0}".format(new_name)
        arcpy.CalculateField_management(new_name, "MD_ATTR_NEW",
                                        "generate_md5(!TMP_ATTR_NEW!)", "PYTHON_9.3",
                                        code_block)
        print "CalculateField MD_XY_NEW to {0}".format(new_name)
        arcpy.CalculateField_management(new_name, "MD_XY_NEW",
                                        "generate_md5(('%.2f' % !INSIDE_X!)+('%.2f' % !INSIDE_Y!))", "PYTHON_9.3",
                                        code_block)
        print "CalculateField MD_NEW to {0}".format(new_name)
        arcpy.CalculateField_management(new_name, "MD_NEW", "generate_md5(!MD_ATTR_NEW!+ !MD_XY_NEW!)", "PYTHON_9.3",
                                        code_block)

        # 面转点
        print "FeatureToPoint_management from {0} to {1}".format(old_name, old_name_point)
        arcpy.FeatureToPoint_management(old_name, old_name_point)
        print "FeatureToPoint_management from {0} to {1}".format(new_name, new_name_point)
        arcpy.FeatureToPoint_management(new_name, new_name_point)

        # 备份一份,以供关联使用
        print "Copy_management from {0} to {1}".format(old_name_point, old_name_join)
        arcpy.Copy_management(old_name_point, old_name_join)
        print "Copy_management from {0} to {1}".format(new_name_point, new_name_join)
        arcpy.Copy_management(new_name_point, new_name_join)

        # 表关联,导出无关联要素,取消关联
        print "MakeFeatureLayer_management from {0} to LAYER_OLD".format(old_name_point)
        arcpy.MakeFeatureLayer_management(old_name_point, "LAYER_OLD")
        print "AddJoin_management..."
        arcpy.AddJoin_management("LAYER_OLD", "MD_OLD", new_name_join, "MD_NEW", "KEEP_ALL")
        print "FeatureClassToFeatureClass_conversion to {0}".format(old_name_change)
        arcpy.FeatureClassToFeatureClass_conversion("LAYER_OLD", temp_gdb_path, old_name_change,
                                                    "{0}.MD_NEW IS NULL".format(new_name_join))
        print "RemoveJoin_management..."
        arcpy.RemoveJoin_management("LAYER_OLD", new_name_join)

        print "MakeFeatureLayer_management from {0} to LAYER_NEW".format(new_name_point)
        arcpy.MakeFeatureLayer_management(new_name_point, "LAYER_NEW")
        print "AddJoin_management..."
        arcpy.AddJoin_management("LAYER_NEW", "MD_NEW", old_name_join, "MD_OLD", "KEEP_ALL")
        print "FeatureClassToFeatureClass_conversion to {0}".format(new_name_change)
        arcpy.FeatureClassToFeatureClass_conversion("LAYER_NEW", temp_gdb_path, new_name_change,
                                                    "{0}.MD_OLD IS NULL".format(old_name_join))
        print "RemoveJoin_management..."
        arcpy.RemoveJoin_management("LAYER_NEW", old_name_join)

        # 删除临时要素类
        name_array = [old_name, new_name, old_name_point, new_name_point, old_name_join, new_name_join]
        print name_array
        for name in name_array:
            print "Delete {0}".format(name)
            arcpy.Delete_management(name)

        # 第一个要素类删除冗余字段
        oid_bak_field_name = ""
        for field in arcpy.ListFields(old_name_change):
            if field.name.upper().find("OID_OLD") >= 0:
                oid_bak_field_name = field.name
                print "Find Field {0}".format(field.name)
                break
        print "Add Field OID2 to {0}...".format(old_name_change)
        arcpy.AddField_management(old_name_change, "OID2", "LONG")
        print "Calculate Field OID2 to {0}...".format(old_name_change)
        arcpy.CalculateField_management(old_name_change, "OID2", "!{0}!".format(oid_bak_field_name), "PYTHON_9.3")
        for field in arcpy.ListFields(old_name_change):
            if not field.editable:
                continue
            if field.name.upper().find("SHAPE") >= 0:
                continue
            if field.name.upper() == "OID2":
                continue
            print "Delete Field {0}...".format(field.name)
            arcpy.DeleteField_management(old_name_change, field.name)

        # 第二个要素类删除冗余字段
        new_bak_field_name = ""
        for field in arcpy.ListFields(new_name_change):
            if field.name.upper().find("OID_NEW") >= 0:
                new_bak_field_name = field.name
                print "Find Field {0}".format(field.name)
                break
        print "Add Field OID2 to {0}...".format(new_name_change)
        arcpy.AddField_management(new_name_change, "OID2", "LONG")
        print "Calculate Field OID2 to {0}...".format(new_name_change)
        arcpy.CalculateField_management(new_name_change, "OID2", "!{0}!".format(new_bak_field_name), "PYTHON_9.3")
        for field in arcpy.ListFields(new_name_change):
            if not field.editable:
                continue
            if field.name.upper().find("SHAPE") >= 0:
                continue
            if field.name.upper() == "OID2":
                continue
            print "Delete Field {0}...".format(field.name)
            arcpy.DeleteField_management(new_name_change, field.name)

        print "Total Time {0}".format(time.time() - time_begin)
        print "1"
    except Exception, e:
        print e.message

你可能感兴趣的:(ArcPy,INSIDE_X,AddJoin,CalculateField)