Unfortunately, I've had to reopen this issue. The error has reappeared, and no number of geometry repairs can fix the problem. There remains an upload limit of ~410,000 vertices per chunk.
I am guessing the problem stems from one of four sources: (1) there's some other major issue with the dataset that I'm overlooking; (2) the conversion from a selection of features, to JSON, then back to a Feature Set builds in some kind of geometry issue; (3) there's something on the AGOL portal side I'm overlooking, like a setting; or (4) there is a bug with the ArcGIS API.
If anyone's interested, here's some code to use for testing...
import arcpy
import arcgis
from arcgis.gis import GIS
from arcgis.features import FeatureSet
import os, sys, time
def getChunks(srcLyr, oidLst):
# FOR TESTING THE THRESHOLD IN VERTICES PER CHUNK
chnkSize = 8 # Change this number to vary the chunk size (<=5 works, >5 does not)
chnks = [oidLst[x:x+chnkSize] for x in range(0, len(oidLst), chnkSize)]
ptCnt = [row[0].pointCount for row in arcpy.da.SearchCursor(srcLyr, ['SHAPE@'])]
chnkVerts = [sum(ptCnt[i:i + chnkSize]) for i in range(0, len(ptCnt), chnkSize)]
print(f'\nTesting with chunk size of {chnkSize} features')
print(f'# Vertices per chunk:\n{chnkVerts}')
return chnkSize, chnks, chnkVerts
def addData(srcLyr, updLyr, oid, chnks, chnkVerts):
counter = 0
startUpload = time.clock() # Start a timer for the upload process
for index, chunk in enumerate(chnks):
try:
print(f'Processing chunk #{index+1} with {len(chunk)} features')
tic = time.clock()
counter += len(chunk)
# Generate a query statement to isolate elements represented by the chunk
query = f'"{oid}" IN ({", ".join(map(str,chunk))})'
# Select subset of data based on chunk index and chunk size
selection = arcpy.SelectLayerByAttribute_management(srcLyr,"NEW_SELECTION",query)
# Convert selected features to JSON file, then import the JSON file as an ArcGIS API FeatureSet
tempJSON = os.path.join(sys.path[0], 'temp.json') # Create path to temp JSON file
if os.path.exists(tempJSON):
os.remove(tempJSON)
arcpy.FeaturesToJSON_conversion(selection, tempJSON)
fs = FeatureSet.from_json(open(tempJSON).read())
try:
# Add chunk of data to the AGOL dataset
print(f'\tAdding {chnkVerts[index]} vertices to hosted feature layer')
results = updLyr.edit_features(adds = fs)
print(f'\tCompleted in {int(round(time.clock()-tic,0))} seconds')
except:
tb = sys.exc_info()
print(f'>>>>>>>> Error adding chunk: {index+1}\n{tb}')
except:
tb = sys.exc_info()
print(f'Error generating data in chunk {index+1} for the update\n{tb}')
print(f'Dataset added in {round((time.clock() - startUpload)/60,2)} minutes')
# Delete temporary data
try:
if os.path.exists(tempJSON):
os.remove(tempJSON)
except:
pass
def main():
arcpy.env.workspace = 'in_memory'
arcpy.ClearWorkspaceCache_management() # Remove lock files
# Connect to AGOL portal
portal = 'https://www.arcgis.com'
username = 'username'
password = 'password'
print(f'Connecting to {portal}')
gis = GIS(portal, username, password)
# Path to test dataset
dsPath = r'D:\Temp\SEED_PLAN_ZONE_POLY_MVW.gdb\WHSE_FOREST_VEGETATION_SEED_PLAN_ZONE_POLY_MVW'
### Also upload the test dataset to ArcGIS Online as a hosted feature layer ###
repLayer = arcpy.RepairGeometry_management(dsPath) # Attempt to repair geometry
srcLayer = arcpy.MakeFeatureLayer_management(repLayer) # Make feature layer from test data
srcName = arcpy.Describe(srcLayer).baseName # Get name of test dataset
srcOID = arcpy.Describe(srcLayer).OIDFieldName # Get name of OID field
# Get list of hosted feature layers
featLayers = gis.content.search('', 'Feature Layer')
print(f'\nPROCESSING: [{srcName}]')
startProcess = time.clock() # Start a timer for the entire update process
# Find matching AGOL layer, delete existing features, then add new data
for featLayer in featLayers:
for lyr in featLayer.layers:
if lyr.properties.name == srcName:
try:
print(f'Deleting features from {srcName}')
lyr.delete_features(where=f"{srcOID}>0", return_delete_results=False)
print('Successfully deleted features')
except:
print(f'Error deleting features from {srcName}')
# Create list of ObjectIDs from the sourceLayer
oidList = list(set(r[0] for r in arcpy.da.SearchCursor(srcLayer, [srcOID])))
# Get chunks of data from source layer as list
chunkSize, chunks, chunkVerts = getChunks(srcLayer, oidList)
print(f'\nNumber of features to add: {len(oidList)} ({int(len(oidList)/chunkSize) + (len(oidList)%chunkSize > 0)} chunk(s))')
addData(srcLayer, lyr, srcOID, chunks, chunkVerts)
print(f'Dataset processed in {round((time.clock() - startProcess)/60,2)} minutes\n\n')
#Delete temporary data
del repLayer, srcLayer
arcpy.Delete_management('in_memory')
arcpy.ClearWorkspaceCache_management() # Remove lock files
if __name__ == "__main__":
main()
The dataset I originally posted should work for testing purposes. I've added the repair geometry function to the code, so there's no need to worry about that aspect.
For anyone running this code, note that switching the chunk size to anything greater than 5 will produce at least one chunk that fails to get added to the AGOL dataset. I initially thought this could be an issue with a single polygon feature that screws up the entire chunk. However, when using chunk sizes of 8 or 10, two chunks fail for each (chunks 4 & 10 and 3 & 8, respectively), suggesting it is not a problem with a single feature. The common link between all failing chunks is the high number of vertices (> ca.410,000).
Also note that without the system traceback in the try/except block, Error Code 405 would be generated by each failing chunk. I've noticed that people who had issues updating hosted feature layers through either ArcGIS REST API or Python API often report this error code. Could this be related?
I think my next step will be to check/repair the geometry of the Feature Set that gets created from the JSON file. That should rule out a conversion issue. Any other suggestions to troubleshoot?
Thanks!