Hi all,
I am trying to update the data at the hosted layer. I am using Python programming language and in addition to that, I am also experimenting with different packages like arcgis, requests, json, etc. Below is the response that I get after adding a record at the hosted layer:
[{'success': True, 'globalId': '<globalid>', 'objectId': <objectId>}]
As it can be observed that the success is "True". However, even then, I don't see the data getting added at the hosted feature layer. Is there anything that I am missing? This is my first data engineering related project in ArcGIS. I am not able to figure out what am I missing. I utilized various methods, but I get the exact same result. Please let me know what your thoughts are. I will also post my code below.
Thank you
fc = r'path_to_data\abc.sde\table'
fields = ['*']
data = [row for row in arcpy.da.SearchCursor(fc, fields)]
columns = [field.name for field in arcpy.ListFields(fc) if field.name in fields or fields == ['*']]
df_sde = pd.DataFrame(data, columns = columns)
# Creating a GIS instance
gis = GIS('<gis_portal>', username, password)
service = '<service_layer>'
hosted = '<hosted_layer>'
# Connecting to the services
fabric_service = FeatureLayerCollection(service, gis)
fabric_hosted = FeatureLayerCollection(hosted, gis)
service_layer = fabric_service.layers[num1]
hosted_layer = fabric_hosted.layers[num2]
## Importing the records as dataframe
service_records = service_layer.query(where="<some condition>", as_df=True, return_geometry=True)
## Truncating all the data in the hosted layer
try:
logging.info("Starting data truncation process for the hosted layer.")
result = hosted_layer.delete_features(where="1=1")
if result['deleteResults']:
logging.info("Data truncated successfully.")
else:
logging.warning("No data was deleted.")
except Exception as e:
logging.error("Error occurred while truncating data: " + str(e))
## Importing the hosted layer data just to get the column names and schema. It results in an empty dataframe with column names and corresponding to data types
hosted_records = hosted_layer.query(where="1=1", as_df=True, return_geometry=True)
# Function to modify geometries of the geometry column in sevice_records to match the geometry requirement in the hosted
def project_in_batches(geometries, in_sr, out_sr, batch_size=5000):
projected_geometries = []
for i in range(0, len(geometries), batch_size):
batch = geometries[i:i + batch_size]
projected_batch = project(geometries=batch, in_sr=in_sr, out_sr=out_sr)
projected_geometries.extend(projected_batch)
return projected_geometries
# Reproject the geometries from service layer to match the hosted layer's spatial reference in batches
projected_geometries = project_in_batches(geometries=service_records[<geometry_column>].tolist(),
in_sr={'wkid':wkid1},
out_sr={'wkid':wkid2})
# Replace the original geometries with the projected ones
service_records[<geometry_column>] = projected_geometries
def merging_service_sde():
'''
This function acts as introducing the columns that must be present in the hosted layer but don't have a corresponding column neither in service_records nor df_sde. After that, such columns are populatied by Null values for the time being.
'''
cols_service = [<columns in service_records>]
cols_sde = [<columns in df_sde>]
cols_ignore = [<columns in hosted_records that do not have a corresponding column neither in service_records nor in df_sde>]
temp_df_record = pd.merge(service_records[cols_service], df_sde[cols_sde],
how = 'left', left_on = col1, right_on = col2)
# Setting columns in cols_ignore as Null
for col in cols_ignore:
temp_df_record[col] = pd.NA
return temp_df_record
merged = merging_service_sde()
mapping = {<mapping dictionary that maps the column names in such a way that it is identical to the column names present in hosted_records>}
# Mapping the column names
merged = merged.rename(columns = mapping)
# Extra step to align the columna according to the order in hosted_records for better readability
merged = merged[hosted_records.columns]
# Aligning the data types
for column in merged.columns:
if column in hosted_records.columns:
desired_type = hosted_records[column].dtype
merged[column] = merged[column].astype(desired_type)
## Handling the missing values
object_cols = merged.select_dtypes(include = ['object', 'string']).columns
num_cols = merged.select_dtypes(include = ['float64', 'Float64', 'Int32', 'Int64']).columns
date_cols = merged.select_dtypes(include = ['datetime64[ns]']).columns
merged[object_cols] = merged[object_cols].fillna('N')
merged[num_cols] = merged[num_cols].fillna(0)
merged[date_cols] = merged[date_cols].fillna('2000-01-01')
merged_spatial = merged.spatial.to_featureset()
batch_size = 100
total_records = len(merged_spatial.features)
batches = [merged_spatial.features[i:i + batch_size] for i in range(0, total_records, batch_size)]
for i, batch in enumerate(batches):
try:
batch_feature_set = FeatureSet(features=batch, geometry_type=merged_spatial.geometry_type,
spatial_reference=merged_spatial.spatial_reference)
update_result = hosted_layer.edit_features(adds=batch_feature_set)
success = list(set([i['success'] for i in update_result['addResults']]))
if len(success) == 1 and success[0] == True:
logging.info(f"Batch {i+1}/{len(batches)} added successfully")
else:
logging.info(f"Batch {i+1}/{len(batches)} not added")
except Exception as e:
logging.error(f"Error adding batch {i+1}/{len(batches)}: {e}")
Solved! Go to Solution.
It finally worked. The issue was caused by the geometry column that was getting altered when I was merging two datasets in Pandas. This modification in the geometry column was preventing the data from getting uploaded at the hosted.
It finally worked. The issue was caused by the geometry column that was getting altered when I was merging two datasets in Pandas. This modification in the geometry column was preventing the data from getting uploaded at the hosted.