Hi everyone,
I'm stuck on a situation that I feel I'm making more complicated than it needs to be.
Situation:
I have a database (700K records) that updates daily, I convert it to a spatial dataframe. I have a hosted feature layer (points) that updates daily. Right now I truncate and append the entire thing, but having it empty for a while isn't ideal.
I want to basically this:
Where I am getting confused is, does the da.cursor just use field position to insert values? row[19] will update the 19th column. But what if someone inserts a new column in the database, would everything get screwed up? Can I instead just match the field names with a field map?
Fields are always added to the end.
If you want to get the field names prior to processing you can, and then produce an index to the name.
A simple example
fc0 = r'C:\\arcpro_npg\\Project_npg\\tests.gdb\\sq2'
from arcpy.da import SearchCursor
with SearchCursor(fc0, "*") as cursor:
fld_names = cursor.fields
n = len(fld_names)
rpt = list(zip(range(0, n), fld_names))
print(f"{rpt}")
for row in cursor:
print(f'{row[0]}')
[(0, 'OBJECTID'), (1, 'Shape'), (2, 'ids'), (3, 'CENTROID_X'),
(4, 'CENTROID_Y'), (5, 'INSIDE_X'), (6, 'INSIDE_Y'),
(7, 'PART_COUNT'), (8, 'PNT_COUNT'), (9, 'Sort_'),
(10, 'Shape_Length'), (11, 'Shape_Area'), (12, 'Text')
, (13, 'Long_'), (14, 'Float_')]
1
2
3
4
5
6
9
There is limited information that you can get from the cursor
dir(cursor)
['__class__', '__delattr__', '__dir__', '__doc__', '__enter__', '__eq__',
'__esri_toolinfo__', '__exit__', '__format__', '__ge__', '__getattribute__',
'__getitem__', '__getstate__', '__gt__', '__hash__', '__init__',
'__init_subclass__', '__iter__', '__le__', '__lt__', '__ne__', '__new__',
'__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__',
'__sizeof__', '__str__', '__subclasshook__',
'_as_narray', '_dtype',
'fields', 'next', 'reset']
# -- example
cursor._dtype
dtype([('OBJECTID', '<i4'), ('Shape', '<f8', (2,)), ('ids', '<i4'),
('CENTROID_X', '<f8'), ('CENTROID_Y', '<f8'), ('INSIDE_X', '<f8'),
('INSIDE_Y', '<f8'), ('PART_COUNT', '<f8'), ('PNT_COUNT', '<f8'),
('Sort_', '<i4'), ('Shape_Length', '<f8'), ('Shape_Area', '<f8'),
('Text', '<U20'), ('Long_', '<i4'), ('Float_', '<i4')])
the _as_narray can't be used if the fields includes the geometry, in such cases, you can use arcpy's FeatureClassToNumPyArray or TableToNumPyArray
@CW-GIS you can use the Append tool and perform an upsert by specifying match fields.
Here is something I use daily to insert missing features.
import arcpy
# Paths to your database and hosted feature layer
database_path = "database"
hosted_layer_url = "feature_layer"
# Define the fields to be used
fields = ['unique_id', 'other_field', 'field1', 'field2', 'field3'] # Add all necessary fields here
# Count the number of features before the insert
initial_feature_count = int(arcpy.GetCount_management(hosted_layer).getOutput(0))
print(f"Initial number of features: {initial_feature_count}")
# Create a dictionary of database records for quick lookup
database_records = {}
with arcpy.da.SearchCursor(database_path, fields) as search_cursor:
for row in search_cursor:
unique_id = row[fields.index('unique_id')]
database_records[unique_id] = row
update_count = 0
insert_count = 0
# Update or insert rows
with arcpy.da.UpdateCursor(hosted_layer, fields) as update_cursor:
for row in update_cursor:
unique_id = row[fields.index('unique_id')]
if unique_id in database_records:
matching_row = database_records[unique_id]
# Check if any fields have different values
updated = False
for i, field in enumerate(fields):
if row[i] != matching_row[i]:
row[i] = matching_row[i]
updated = True
if updated:
update_cursor.updateRow(row)
update_count += 1
else:
# Insert new row if unique_id is not found
with arcpy.da.InsertCursor(hosted_layer, fields) as insert_cursor:
insert_cursor.insertRow(database_records[unique_id])
insert_count += 1
# Count the number of features after the insert
final_feature_count = int(arcpy.GetCount_management(hosted_layer).getOutput(0))
print(f"Final number of features: {final_feature_count}")
print(f"Number of features updated: {update_count}")
print(f"Number of features inserted: {insert_count}")
print("Update and insert operations completed.")
@TonyAlmeida Thank you! This works.
I'm confused though, I run it once and I see it changes the values. If I run it a second time, 10 minutes later, shouldn't there be no edits because everything has already been edited to match the database?
When I run in 10 minutes later, it still makes the same number of changes as the first run.
Your right, mine does the same. Try this.
Changes made are:
1- Trimmed whitespace and converted all field values to strings before comparison
2 - Prevents redundant inserts by comparing existing unique IDs in the hosted layer
import arcpy
# Paths to the local database and the hosted feature layer
database_path = "database"
hosted_layer = "feature_layer"
# Fields to check and update
fields = ['unique_id', 'other_field', 'field1', 'field2', 'field3'] # Update this list as needed
# Get the initial count of features in the hosted layer
initial_feature_count = int(arcpy.GetCount_management(hosted_layer).getOutput(0))
print(f"Initial number of features: {initial_feature_count}")
# Create a dictionary to store records from the database for quick lookups
database_records = {}
with arcpy.da.SearchCursor(database_path, fields) as search_cursor:
for row in search_cursor:
unique_id = row[fields.index('unique_id')]
database_records[unique_id] = row
update_count = 0
insert_count = 0
# Process rows in the hosted layer to update existing records
with arcpy.da.UpdateCursor(hosted_layer, fields) as update_cursor:
for row in update_cursor:
unique_id = row[fields.index('unique_id')]
if unique_id in database_records:
matching_row = database_records[unique_id]
updated = False
# Compare each field and update values if they differ
for i, field in enumerate(fields):
if str(row[i]).strip() != str(matching_row[i]).strip():
row[i] = matching_row[i]
updated = True
if updated:
update_cursor.updateRow(row)
update_count += 1
# Insert new rows for records in the database that are not in the hosted layer
with arcpy.da.SearchCursor(hosted_layer, ['unique_id']) as hosted_cursor:
hosted_ids = {row[0] for row in hosted_cursor}
for unique_id, record in database_records.items():
if unique_id not in hosted_ids:
with arcpy.da.InsertCursor(hosted_layer, fields) as insert_cursor:
insert_cursor.insertRow(record)
insert_count += 1
# Get the final count of features in the hosted layer
final_feature_count = int(arcpy.GetCount_management(hosted_layer).getOutput(0))
print(f"Final number of features: {final_feature_count}")
print(f"Number of features updated: {update_count}")
print(f"Number of features inserted: {insert_count}")
print("Update and insert operations completed.")
Although Esri supports "*" for fields with ArcPy DA cursors, they did so reluctantly, and it is a bad practice. You should always explicitly list the fields in a cursor, then you always know the order regardless of the order of the fields in the table.