<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Python hashlib to compare shapefiles\feature classes in Python Questions</title>
    <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254524#M19577</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I've been trying for a while to come up with something similar and even converted Bruce's code to use multiprocessing. Below is the code that shows the most promise it will process 67000+ features and compare against 67000+ features in about a minute.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE class="lia-code-sample line-numbers language-none"&gt;#Script to compare 2 feature classes
#Author Wes Miller


import arcpy
import numpy as np
import datetime


#Print start time and date 
dt = datetime.datetime.now()
print dt


#The Feature Class to Check
originFC = arcpy.GetParameterAsText(0)
#The feature class with updates needed for feature class above
changeFC = arcpy.GetParameterAsText(1)


origDesc = arcpy.Describe(originFC)
oidName = origDesc.oidFieldName


field_names = arcpy.GetParameterAsText(2)#Example: "Shape","TMS","OWNERNAME","TOTBDGVAL","DISTRICT","ADD1","ADD2","ADD3","MAP","BLOCK","PARCEL","CALCULATED_ACREAGE","ADDR_SITE"


originFCarr = arcpy.da.FeatureClassToNumPyArray(originFC,(field_names),null_value=-9999)
originFCarrID = arcpy.da.FeatureClassToNumPyArray(originFC,(oidName))
changeFCarr = arcpy.da.FeatureClassToNumPyArray(changeFC,(field_names),null_value=-9999)
changeFCarrID = arcpy.da.FeatureClassToNumPyArray(changeFC,(oidName))


print "Arrays Complete"
count = 0
deletesarr = np.where(np.invert(np.in1d(originFCarr,changeFCarr)))
addsarr = np.where(np.invert(np.in1d(changeFCarr,originFCarr)))
adds = []
for each in&amp;nbsp; addsarr[0]:
&amp;nbsp;&amp;nbsp;&amp;nbsp; adds.append(changeFCarrID[each][0])
deletes = []
for each in deletesarr[0]:
&amp;nbsp;&amp;nbsp;&amp;nbsp; deletes.append(originFCarrID[each][0])


#Current Outputs are printed here for testing purposes
print adds
print 
print deletes


ndt = datetime.datetime.now()
print ndt&lt;/PRE&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Sat, 11 Dec 2021 12:34:35 GMT</pubDate>
    <dc:creator>WesMiller</dc:creator>
    <dc:date>2021-12-11T12:34:35Z</dc:date>
    <item>
      <title>Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254515#M19568</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I'd like to find out if anyone within the community has used Python hashlib to compare shapefiles&amp;nbsp; or feature classes. Would you need to compare the geometry and records to figure out if the shapefiles or feature classes are the same. Any advice in how I can use python hashes to compare shapefiles or feature classes will be appreciated.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 22 Jul 2015 05:49:43 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254515#M19568</guid>
      <dc:creator>PeterWilson</dc:creator>
      <dc:date>2015-07-22T05:49:43Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254516#M19569</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Why not just use the Feature Compare tool in Data Management/Data Comparison&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 22 Jul 2015 06:57:19 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254516#M19569</guid>
      <dc:creator>NeilAyres</dc:creator>
      <dc:date>2015-07-22T06:57:19Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254517#M19570</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;closure on this Peter?&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 11 Aug 2015 22:46:02 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254517#M19570</guid>
      <dc:creator>DanPatterson_Retired</dc:creator>
      <dc:date>2015-08-11T22:46:02Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254518#M19571</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Unfortunately, the Feature Compare tool is not smartest tool there is... If I compare two files which are the same, then the result is as expected:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE class="lia-code-sample line-numbers language-none"&gt;FeatureClass: Shape types are the same.
FeatureClass: Feature types are the same.
Table: Table row counts are the same.
FeatureClass: Feature class extents are the same.
GeometryDef: GeometryDefs are the same.
Field: Field properties are the same.
Table: Table row counts are the same.
SpatialReference: Spatial references are the same.
Table: Rows are the same.&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;However if I would delete feature 2 from the featureclass, like this:&lt;/P&gt;&lt;P&gt;&lt;IMG class="image-1 jive-image" src="https://community.esri.com/legacyfs/online/121758_pastedImage_0.png" style="max-width: 1200px; max-height: 900px;" /&gt;&lt;/P&gt;&lt;P&gt;... and run the tool sorting on the common field (myField) you will notice that it compares based on the row number, not on the value in the sort field. It just sorts and compares row 2 from the base features to row 2 from the test features and will find that everything is different starting from record 2...&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;So hashing might be a solution...&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 11 Dec 2021 12:34:30 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254518#M19571</guid>
      <dc:creator>XanderBakker</dc:creator>
      <dc:date>2021-12-11T12:34:30Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254519#M19572</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;now... if we would hash it like this and compare both featureclasses...&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE class="lia-code-sample line-numbers language-none"&gt;import arcpy
def main():
&amp;nbsp;&amp;nbsp;&amp;nbsp; fc1 = r"C:\GeoNet\FeatureCompare\test.gdb\polygons1"
&amp;nbsp;&amp;nbsp;&amp;nbsp; fc2 = r"C:\GeoNet\FeatureCompare\test.gdb\polygons2"

&amp;nbsp;&amp;nbsp;&amp;nbsp; # create dictionary for each featureclass
&amp;nbsp;&amp;nbsp;&amp;nbsp; flds1 = getFields(fc1)
&amp;nbsp;&amp;nbsp;&amp;nbsp; dct_fc1 = {getHashFeature(r, r[1], flds1):r[0] for r in arcpy.da.SearchCursor(fc1, flds1)}
&amp;nbsp;&amp;nbsp;&amp;nbsp; flds2 = getFields(fc2)
&amp;nbsp;&amp;nbsp;&amp;nbsp; dct_fc2 = {getHashFeature(r, r[1], flds2):r[0] for r in arcpy.da.SearchCursor(fc2, flds2)}

&amp;nbsp;&amp;nbsp;&amp;nbsp; for h2, oid2 in dct_fc2.items():
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if h2 in dct_fc1:
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; print "OID {0} from fc2 corresponds to OID {1} in fc1".format(oid2, dct_fc1[h2])
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; else:
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; print "OID {0} from fc2 has no match with features in fc1".format(oid2)

&amp;nbsp;&amp;nbsp;&amp;nbsp; for h1, oid1 in dct_fc1.items():
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if h1 in dct_fc2:
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; print "OID {0} from fc1 corresponds to OID {1} in fc2".format(oid1, dct_fc2[h1])
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; else:
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; print "OID {0} from fc1 has no match with features in fc2".format(oid1)

def getFields(fc):
&amp;nbsp;&amp;nbsp;&amp;nbsp; """Get all fields without fields to exclude like OID, Geometry, """
&amp;nbsp;&amp;nbsp;&amp;nbsp; flds_exclude = getFieldsExclude(fc)
&amp;nbsp;&amp;nbsp;&amp;nbsp; flds = [fld.name for fld in arcpy.ListFields(fc) if fld.name not in flds_exclude]
&amp;nbsp;&amp;nbsp;&amp;nbsp; flds.insert(0,"SHAPE@")
&amp;nbsp;&amp;nbsp;&amp;nbsp; flds.insert(0,"OID@")
&amp;nbsp;&amp;nbsp;&amp;nbsp; return flds

def getFieldsExclude(fc):
&amp;nbsp;&amp;nbsp;&amp;nbsp; """List of fields to exclude"""
&amp;nbsp;&amp;nbsp;&amp;nbsp; lst = []
&amp;nbsp;&amp;nbsp;&amp;nbsp; props = ["OIDFieldName", "shapeFieldName", "areaFieldName", "lengthFieldName"]
&amp;nbsp;&amp;nbsp;&amp;nbsp; desc = arcpy.Describe(fc)
&amp;nbsp;&amp;nbsp;&amp;nbsp; for prop in props:
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if hasattr(desc, prop):
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; lst.append(eval("desc.{0}".format(prop)))
&amp;nbsp;&amp;nbsp;&amp;nbsp; return lst

def getHashFeature(row, geom, flds):
&amp;nbsp;&amp;nbsp;&amp;nbsp; """Hash feature (without OID, length and area fields)"""
&amp;nbsp;&amp;nbsp;&amp;nbsp; # row: OBJECTID, SHAPE, other flds
&amp;nbsp;&amp;nbsp;&amp;nbsp; # md5, sha1, sha224, sha256, sha384, sha512
&amp;nbsp;&amp;nbsp;&amp;nbsp; import hashlib
&amp;nbsp;&amp;nbsp;&amp;nbsp; dct = {}
&amp;nbsp;&amp;nbsp;&amp;nbsp; for fld in flds[2:]:
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; dct[fld] = row[flds.index(fld)]
&amp;nbsp;&amp;nbsp;&amp;nbsp; dct[flds[1]] = geom.JSON
&amp;nbsp;&amp;nbsp;&amp;nbsp; h = hashlib.sha512(str(dct))
&amp;nbsp;&amp;nbsp;&amp;nbsp; return h.hexdigest()

if __name__ == '__main__':
&amp;nbsp;&amp;nbsp;&amp;nbsp; main()&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;It returns this:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE class="lia-code-sample line-numbers language-none"&gt;OID 3 from fc2 corresponds to OID 3 in fc1
OID 5 from fc2 corresponds to OID 5 in fc1
OID 4 from fc2 corresponds to OID 4 in fc1
OID 1 from fc2 corresponds to OID 1 in fc1
OID 3 from fc1 corresponds to OID 3 in fc2
OID 5 from fc1 corresponds to OID 5 in fc2
OID 4 from fc1 corresponds to OID 4 in fc2
OID 2 from fc1 has no match with features in fc2
OID 1 from fc1 corresponds to OID 1 in fc2&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;...it matches the features based on the hash (sha512) generated. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;However, don't get your hopes up to high since the code is terribly slow...&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 11 Dec 2021 12:34:32 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254519#M19572</guid>
      <dc:creator>XanderBakker</dc:creator>
      <dc:date>2021-12-11T12:34:32Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254520#M19573</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I attempted this a few years back, the code from my tool is attached.&lt;/P&gt;&lt;P&gt;Really I was wanting to mimic the ChangeDetector transformer in Data Interoperability/FME.&lt;/P&gt;&lt;P&gt;If you have real work to do buy Data Interoperability or FME, or if you are working with polyline features the new Detect Feature Changes tool (Advanced) might do the trick.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 12 Aug 2015 15:09:47 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254520#M19573</guid>
      <dc:creator>BruceHarold</dc:creator>
      <dc:date>2015-08-12T15:09:47Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254521#M19574</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi &lt;A href="https://community.esri.com/migrated-users/3241"&gt;Bruce Harold&lt;/A&gt;​ looks promising. Will definitely have a look at it. Thanks for sharing!&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 12 Aug 2015 19:33:17 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254521#M19574</guid>
      <dc:creator>XanderBakker</dc:creator>
      <dc:date>2015-08-12T19:33:17Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254522#M19575</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I wasn't able to generate my own comparison tool, so I ended up utilizing FME Software.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 12 Jan 2016 21:21:50 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254522#M19575</guid>
      <dc:creator>PeterWilson</dc:creator>
      <dc:date>2016-01-12T21:21:50Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254523#M19576</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;To bad to hear that you couldn't create the tool, but FME is a very good alternative. You may want to mark the answer by Harold as correct answer&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 12 Jan 2016 21:29:45 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254523#M19576</guid>
      <dc:creator>XanderBakker</dc:creator>
      <dc:date>2016-01-12T21:29:45Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254524#M19577</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I've been trying for a while to come up with something similar and even converted Bruce's code to use multiprocessing. Below is the code that shows the most promise it will process 67000+ features and compare against 67000+ features in about a minute.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE class="lia-code-sample line-numbers language-none"&gt;#Script to compare 2 feature classes
#Author Wes Miller


import arcpy
import numpy as np
import datetime


#Print start time and date 
dt = datetime.datetime.now()
print dt


#The Feature Class to Check
originFC = arcpy.GetParameterAsText(0)
#The feature class with updates needed for feature class above
changeFC = arcpy.GetParameterAsText(1)


origDesc = arcpy.Describe(originFC)
oidName = origDesc.oidFieldName


field_names = arcpy.GetParameterAsText(2)#Example: "Shape","TMS","OWNERNAME","TOTBDGVAL","DISTRICT","ADD1","ADD2","ADD3","MAP","BLOCK","PARCEL","CALCULATED_ACREAGE","ADDR_SITE"


originFCarr = arcpy.da.FeatureClassToNumPyArray(originFC,(field_names),null_value=-9999)
originFCarrID = arcpy.da.FeatureClassToNumPyArray(originFC,(oidName))
changeFCarr = arcpy.da.FeatureClassToNumPyArray(changeFC,(field_names),null_value=-9999)
changeFCarrID = arcpy.da.FeatureClassToNumPyArray(changeFC,(oidName))


print "Arrays Complete"
count = 0
deletesarr = np.where(np.invert(np.in1d(originFCarr,changeFCarr)))
addsarr = np.where(np.invert(np.in1d(changeFCarr,originFCarr)))
adds = []
for each in&amp;nbsp; addsarr[0]:
&amp;nbsp;&amp;nbsp;&amp;nbsp; adds.append(changeFCarrID[each][0])
deletes = []
for each in deletesarr[0]:
&amp;nbsp;&amp;nbsp;&amp;nbsp; deletes.append(originFCarrID[each][0])


#Current Outputs are printed here for testing purposes
print adds
print 
print deletes


ndt = datetime.datetime.now()
print ndt&lt;/PRE&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 11 Dec 2021 12:34:35 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254524#M19577</guid>
      <dc:creator>WesMiller</dc:creator>
      <dc:date>2021-12-11T12:34:35Z</dc:date>
    </item>
    <item>
      <title>Re: Python hashlib to compare shapefiles\feature classes</title>
      <link>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254525#M19578</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;nice to see you using numpy &lt;IMG src="https://community.esri.com/legacyfs/online/emoticons/happy.png" /&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 18 Feb 2016 21:09:36 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-hashlib-to-compare-shapefiles-feature/m-p/254525#M19578</guid>
      <dc:creator>DanPatterson_Retired</dc:creator>
      <dc:date>2016-02-18T21:09:36Z</dc:date>
    </item>
  </channel>
</rss>

