I have a service that I am scrapping via Python that I am scrapping in Chucks do to the size of the dataset... about 12,000 records
During this scrape it writes the values to individual JSON files 1000 records at a time. I can open these JSON files and see there is an OBJECTID =1 and then OBJECTID=1000 and then the next one is OBJECTID=1001 and OBJECTID=2000 etc etc...
I can Copy that contents of that JSON file and put it in the REST Services and update from there with NO issues.. so I know the format is correct.
I read somewhere that there is a limit on characters of the JSON.Load which I am trying in the second def below. My files are about 830 KB in size...it appears not all of them are getting in... as I only get about 4300 of the 12k records in the Append Process...
Good news is that it does NOT Error out. But simply does not append all the records.
If there is a limitation on the JSON.Load how do I get around it... I tried to decrease the 1000 record count in my code to make more files but that does not seem to work... Im puzzled...
Thoughts?
def scrapeData(out_json_path, out_fc_path):
try:
service_props = ds.getServiceProperties(ds.service_url)
max_record_count = 1000
layer_ids = ds.getLayerIds(ds.service_url)
id_groups = list(ds.chunks(layer_ids, max_record_count))
for i, id_group in enumerate(id_groups):
print(' group {} of {}'.format(i+1, len(id_groups)))
layer_data = ds.getLayerDataByIds(ds.service_url, id_group)
level = str(i)
outjsonpath = outputVariable + level + ".json"
if i==0: # If first iteration of id_groups
layer_data_final = layer_data
with open(outjsonpath, 'w') as out_json_file:
json.dump(layer_data_final, out_json_file)
else:
layer_data_final2 = layer_data
with open(outjsonpath, 'w') as out_json_file:
json.dump(layer_data_final2, out_json_file)
I then loop through the JSON files and Append them to the Feature Service
def addDEQData():
for x in os.listdir(path):
if x.startswith("output"):
filetoImport = path + x
f = open(filetoImport)
data = json.load(f)
featureAddingAdd = data['features']
gis_payloadAdd = {
'token': currenttoken,
'f': 'json',
'features': f'''{featureAddingAdd}'''
}
urlAdd = urlADDFeatures
files=[]
headers = {}
response = requests.request("POST", urlAdd, headers=headers, data=gis_payloadAdd, files=files)
Solved! Go to Solution.
import arcpy
import arcgis
from arcgis import GIS
import requests
import json
import os
targetURL = "https://apps.deq.virginia.gov/arcgis/rest/services/public/EDMA/MapServer/102"
gis = GIS("https://xxxxxxx/portal", "username", "password")
currenttoken = gis._con.token
urlADDFeatures = r"https://xxxxxx/env/rest/services/DEV/VDEQ_Petro_Tank_Facility_DEV/FeatureServer/0/addFeatures"
urlDeleteFeatures = r"https://xxxxxx/env/rest/services/DEV/VDEQ_Petro_Tank_Facility_DEV/FeatureServer/0/deleteFeatures"
osPath = os.path.dirname(os.path.abspath(__file__))
outputVariable = osPath + "\\PetroleumTankFacilities\\output"
path = osPath + "\\PetroleumTankFacilities\\"
arcpy.env.overwriteOutput = True
class DataScraper():
def __init__(self):
# URL to map service you want to extract data from
self.service_url = targetURL
def getServiceProperties(self, url):
URL = url
PARAMS = {'f' : 'json'}
r = requests.get(url = URL, params = PARAMS)
service_props = r.json()
return service_props
def getLayerIds(self, url, query=None):
URL = url + '/query'
PARAMS = {'f':'json', 'returnIdsOnly': True, 'where' : '1=1'}
if query:
PARAMS['where'] = "ST = '{}'".format(query)
r = requests.get(url = URL, params = PARAMS)
data = r.json()
return data['objectIds']
def getLayerDataByIds(self, url, ids):
# ids parameter should be a list of object ids
URL = url + '/query'
field = 'OBJECTID'
value = ', '.join([str(i) for i in ids])
PARAMS = {'f': 'json', 'where': '{} IN ({})'.format(field, value), 'returnIdsOnly': False,
'returnCountOnly': False,'returnGeometry': True, 'outFields': '*'}
r = requests.post(url=URL, data=PARAMS)
layer_data = r.json()
return layer_data
def chunks(self, lst, n):
# Yield successive n-sized chunks from list
for i in range(0, len(lst), n):
yield lst[i:i + n]
#def scrapeData(out_json_path, out_fc_path):
def scrapeData():
try:
service_props = ds.getServiceProperties(ds.service_url)
max_record_count = service_props['maxRecordCount']
layer_ids = ds.getLayerIds(ds.service_url)
# RETURN 1000 Records
id_groups = list(ds.chunks(layer_ids, max_record_count))
# RETURN 100 Records
#id_groups = list(ds.chunks(layer_ids, 100))
for i, id_group in enumerate(id_groups):
print(' group {} of {}'.format(i+1, len(id_groups)))
layer_data = ds.getLayerDataByIds(ds.service_url, id_group)
level = str(i)
outjsonpath = outputVariable + level + ".json"
if i==0: # If first iteration of id_groups
print(outjsonpath)
print(outshapefilepath)
layer_data_final = layer_data
print('Writing JSON file...')
with open(outjsonpath, 'w') as out_json_file:
json.dump(layer_data_final, out_json_file)
else:
print(outjsonpath)
print(outshapefilepath)
layer_data_final2 = layer_data
print('Writing JSON file...')
with open(outjsonpath, 'w') as out_json_file:
json.dump(layer_data_final2, out_json_file)
except Exception:
# Handle errors accordingly...this is generic
tb = sys.exc_info()[2]
tb_info = traceback.format_tb(tb)[0]
pymsg = 'PYTHON ERRORS:\n\tTraceback info:\t{tb_info}\n\tError Info:\t{str(sys.exc_info()[1])}\n'
msgs = 'ArcPy ERRORS:\t{arcpy.GetMessages(2)}\n'
print(pymsg)
print(msgs)
def deleteDEQData():
print("Starting to Delete")
url = urlDeleteFeatures
whereclause='1=1'
gis_payload = {
'token': currenttoken,
'f': 'json',
'where': f'''{whereclause}'''
}
response = requests.request("POST", url=url, data=gis_payload)
print("Done Deleting")
def addDEQData():
print("Starting to Append")
for x in os.listdir(path):
if x.startswith("output"):
filetoImport = path + x
print("Appending: " + x)
f = open(filetoImport)
data = json.load(f)
featureAddingAdd = data['features']
print(featureAddingAdd)
gis_payloadAdd = {
'token': currenttoken,
'f': 'json',
'features': f'''{featureAddingAdd}'''
}
print("-------")
print(gis_payloadAdd)
urlAdd = urlADDFeatures
files=[]
headers = {}
#response = requests.request("POST", urlAdd, headers=headers, data=gis_payloadAdd, files=files)
#response = requests.request("POST", urlAdd, data=gis_payloadAdd)
#print(response)
resp = requests.post(urlAdd, gis_payloadAdd)
print(resp)
if __name__ == '__main__':
# Instantiate DataScraper class
ds = DataScraper()
# Run the function to get your data
scrapeData()
deleteDEQData()
addDEQData()
print("Done Appending")
Okay, I think maybe I understand. You might be confusing ArcPy with the ArcGIS API for Python? For ArcPy you would most likely edit data directly using an sde connection file.
The ArcGIS API for Python, on the other hand, is just a wrapper for the REST API that makes it easier to use. I guess the only way you wouldn't be able to use it for this purpose is if you are working with a standalone ArcGIS Server. Otherwise, you would simply log into your portal, load the target Feature Layer, and use the FeatureLayer object's "edit_features" method to accomplish a similar result.
Not sure what you are getting at... is my approach doable? I mean why does it add 19 of 100 records and 200 of 1000 records and just stop appending?
From your sample, you're already using the api for login purposes. Why don't you give the edit_features method a shot with your json and see if that produces different results? I don't observe the same behavior using just the API, so I think at least part of the problem may be missing headers.
Not sure how to do this... Why don't you give the edit_features method a shot with your json
Do you have an example... does this work without an SDE connection file?
What would be missing in the headers? Parameters? Not sure there.
There's some examples here: Editing Features | ArcGIS API for Python.
No SDE connection required. Your code would change to something like this
import arcgis
from arcgis.features import FeatureLayer
gis = GIS("https://xxxxxxx/portal", "username", "password")
fl_url = "https://xxxxxx/env/rest/services/DEV/VDEQ_Petro_Tank_Facility_DEV/FeatureServer/0"
fl = FeatureLayer(fl_url, gis)
...other logic...
fl.edit_features(adds=YOUR_ADDS, deletes=YOUR_DELETES, updates=YOUR_UPDATES)
Where the adds, deletes, updates are a FeatureSet object, or a list of features in json form (I think you basically have this already with your JSON).
WORKED FANTASTIC... thank you very much @EarlMedina with steering me in the correct direction... and its FAST!!!
portal_item = gis.content.get('6e91cfxxxxx1fc7294493dddd')
ports_layer = portal_item.layers[0]
for x in os.listdir(path):
if x.startswith("output"):
filetoImport = path + x
f = open(filetoImport)
data = json.load(f)
featureAddingAdd = data['features']
add_result = ports_layer.edit_features(adds = featureAddingAdd)
Awesome glad to help streamline things!