https://support.esri.com/en/technical-article/000023355
I want to improve on the above script from ESRI that finds duplicate values and writes the values to a new field in a table. It's using an old cursor and it is very slow with large datasets. But, I'm running into problems due to lack of experience.
Their code verbatim for my purposes:
import arcpy
'''
This script will count the number of occurences of a value in a field ("field_in") and write them to a new field ("field_out")
'''
#path to GDB goes here
arcpy.env.workspace = r"C:\pathto\DuplicateTesting.gdb\DuplicateTesting"
#name of feature class goes here
infeature = "backup_02232021"
field_in = "name"
field_out = "COUNT_"+field_in
arcpy.AddField_management(infeature, field_out,"SHORT")
lista= []
cursor1=arcpy.SearchCursor(infeature)
for row in cursor1:
i=row.getValue(field_in)
lista.append(i)
del cursor1, row
cursor2=arcpy.UpdateCursor(infeature)
for row in cursor2:
i=row.getValue(field_in)
occ=lista.count(i)
row.setValue(field_out, occ)
cursor2.updateRow(row)
del cursor2, row
The following is as far as I got with my improvement. It's throwing an error on the line with the i variable. How can I continue with the UpdateCursor method to put the count of each item in the fields list into its corresponding new field?
infeature = r"C:pathto\DuplicateTesting.gdb\DuplicateTesting
fields = ["name", "location_string_output", "email_address", "cell_phone_number",
"home_phone_number"]
for field_out in fields: #add new fields
arcpy.AddField_management(infeature, "COUNT_"+field_out,"SHORT")
list= []
with arcpy.da.SearchCursor(infeature, fields) as cursor1:
for row in cursor1:
list.append(row)
del cursor1, row
with arcpy.da.UpdateCursor(infeature, fields) as cursor2:
for row in cursor2:
i = row.getValue(row) #<-- throws AttributeError here
occ = list.count(i)
print(occ)
del cursor2, row
Error:
Traceback (most recent call last):
File "\\gisfile\GISstaff\Jared\Python Scripts\ArcGISPro\DuplicateFields_updated.py", line 21, in <module>
i = row.getValue(row)
AttributeError: 'list' object has no attribute 'getValue'
Solved! Go to Solution.
Instead of trying to do the whole list of fields at once, how about iterating over each field and passing it into the cursors?
edited for refractoring the cursor into list comprehension and removing the r in the string.
excluded = ['OBJECTID', 'Shape', 'Shape_Length', 'Shape_Area'] # exclude fields if you want
# set the fieldnames
infeatureflds = [f.name for f in arcpy.ListFields(infeature) if f.name not in excluded]
for fld in infeatureflds:
outfld = f"{fld}_cnts"
arcpy.AddField_management(infeature, outfld, "SHORT")
listValues = [row[0] for row in arcpy.da.SearchCursor(infeature, fld)]
with arcpy.da.UpdateCursor(infeature, [fld, outfld]) as uCur:
for row in uCur:
row[1] = listValues.count(row[0])
uCur.updateRow(row)
print("Done.")
try print(row)
and you will see that it isn't what getValue wants
Row—ArcGIS Pro | Documentation
fields = ["name", "location_string_output", "email_address", "cell_phone_number",
"home_phone_number"]
for field_out in fields: #add new fields
arcpy.AddField_management(infeature, "COUNT_"+field_out,"SHORT")
#list is reserved - use soemthing else
values_list= []
with arcpy.da.SearchCursor(infeature, fields) as cursor1:
#dont append the row to the list, append the values
#this isnt perfect either, throwing everything into one list,
#ie what if someone puts a home and mobile number as the same
#it gets counted as well, so this needs to be improved later
for row in cursor1:
#access each field value by the index i then append
for i in range(0,len(fields)):
#you access a value by a row index, this starts at 0 from the
#first field input - i.e. "name" field values are at row[0]
values_list.append(row[i])
#values_list is now a giant list of every value in all those fields
with arcpy.da.UpdateCursor(infeature, fields) as cursor2:
for row in cursor2:
#the same concept - loop over your fields - the original script was
#only designed for 1 field so you need these loops (maybe!)
for i in range(0,len(fields)):
#get the value of the rield in the row
value = row[i] #<-- throws AttributeError here
#count how often it appears in values_list
occ = values_list.count(i)
#you want to know the field it occurs in also!
#(i know you want to insert these values into the newly created
#fields, but it will be handy to know the field
#in the print statement for now
#get that field value by using i as an index to access fields list
#N.B. !!! this will print alot of values
print(fields[i] + " Count - " + str(occ))
David,
Thanks for the very detailed response! On the first go, it through the following error.
occ = list.count(i) #<-- line that threw the error
Error:
TypeError: descriptor 'count' requires a 'list' object but received a 'int'
ah right, I saw that when I posted it but thought I edited it out pretty fast. It must still be pending or something. Then I found another mistake... Try this. I've not tested it.
infeature = r"C:pathto\DuplicateTesting.gdb\DuplicateTesting
fields = ["name", "location_string_output", "email_address", "cell_phone_number",
"home_phone_number"]
for field_out in fields: #add new fields
arcpy.AddField_management(infeature, "COUNT_"+field_out,"SHORT")
#list is reserved - use soemthing else
values_list= []
with arcpy.da.SearchCursor(infeature, fields) as cursor1:
#dont append the row to the list, append the values
#this isnt perfect either, throwing everything into one list,
#ie what if someone puts a home and mobile number as the same
#it gets counted as well, so this needs to be improved later
for row in cursor1:
#access each field value by the index i then append
for i in range(0,len(fields)):
#you access a value by a row index, this starts at 0 from the
#first field input - i.e. "name" field values are at row[0]
values_list.append(row[i])
#values_list is now a giant list of every value in all those fields
with arcpy.da.UpdateCursor(infeature, fields) as cursor2:
for row in cursor2:
#the same concept - loop over your fields - the original script was
#only designed for 1 field so you need these loops (maybe!)
for i in range(0,len(fields)):
#get the value of the rield in the row
value = row[i] #<-- throws AttributeError here
#count how often it appears in values_list
occ = values_list.count(value)
#you want to know the field it occurs in also!
#(i know you want to insert these values into the newly created
#fields, but it will be handy to know the field
#in the print statement for now
#get that field value by using i as an index to access fields list
#N.B. !!! this will print alot of values
print(fields[i] + " Count - " + str(occ))
Yep, running now! The range() function was what I was missing (struggling to come up with).
Should have also printed the value for testing :
print(fields[i] + " - value - " + str(value) + " - Count - " + str(occ))
Would be good to see your final code when you get the appending sorted within that same update cursor.
Been spinning my wheels. I've tried this amongst other things:
with arcpy.da.UpdateCursor(infeature, fields) as cursor2:
for row in cursor2:
for i in range(0,len(fields)):
value = row[i]
occ = values_list.count(value)
for field in field_out:
cursor2.updateRow(field)
But it gives me this error:
File "pathto\DuplicateFields_updated.py", line 47, in <module>
cursor2.updateRow(field)
TypeError: sequence size must match size of the row
Share the whole script. field_out only exists within your initial for loop (I think, but you've not shared the full script). Also a cursor cannot update a field which is not supplied in the fields list when instantiated.
updateRow() method acts on a row - updateRow(row) this updates the row. assign a value to the field by row index then updateRow(row):
if first field supplied is 'name', to update this fields row in the iteration to 'apples':
row[0] = 'apples'
cursor.updateRow(row)
fields = ["name", "location_string_output", "email_address", "cell_phone_number",
"home_phone_number"]
#list to contain the new fields
new_fields = []
for field in fields:
field_out = "COUNT_" + field
#add the fields to all_fields
new_fields.append(field_out)
#list to contain all fields, this is used as fields list
#in the update cursor - to do this we just add the lists together!
all_fields = fields + new_fields
Sorry.. here's the whole script. I'm in the middle of digesting your latest post.
infeature = r"pathto\DuplicateTesting.gdb\backup_02232021_TESTDELETE"
fields = ["name", "location_string_output", "email_address", "cell_phone_number",
"home_phone_number"]
for field_out in fields: #add new fields
arcpy.AddField_management(infeature, "COUNT_"+field_out,"SHORT")
values_list= []
with arcpy.da.SearchCursor(infeature, fields) as cursor1:
#dont append the row to the list, append the values
#this isnt perfect either, throwing everything into one list,
#ie what if someone puts a home and mobile number as the same
#it gets counted as well, so this needs to be improved later
for row in cursor1:
#access each field value by the index i then append
for i in range(0,len(fields)):
#you access a value by a row index, this starts at 0 from the
#first field input - i.e. "name" field values are at row[0]
values_list.append(row[i])
#values_list is now a giant list of every value in all those fields
with arcpy.da.UpdateCursor(infeature, fields) as cursor2:
for row in cursor2:
#the same concept - loop over your fields - the original script was
#only designed for 1 field so you need these loops (maybe!)
for i in range(0,len(fields)):
#get the value of the field in the row
value = row[i]
#count how often it appears in values_list
occ = values_list.count(value)
#you want to know the field it occurs in also!
#(i know you want to insert these values into the newly created
#fields, but it will be handy to know the field
#in the print statement for now
#get that field value by using i as an index to access fields list
#N.B. !!! this will print alot of values
#print(fields[i] + " - value - " + str(value) + " - Count - " + str(occ))
field_out.append(occ)