I have this URL: https://geohub-vadeq.hub.arcgis.com/datasets/57759688e4944bb987add68c4f0c5ada_104/explore?location=3...
or
https://geohub-vadeq.hub.arcgis.com/datasets/57759688e4944bb987add68c4f0c5ada_104/about
I want to use python/arcpy and download all its data. If I use the RestEndpoint there is a limit of 1000 records. I cant seem to get all 50k.
Any ideas on how I can programmatically ? Looking to get a GeoJSON or JSON and write it to a JSON file locally...
Solved! Go to Solution.
I do this kind of thing all the time! Love data mining. You have to group your requests in batches of whatever the max number of records that will be returned is, in this case 1000. Here's a code snippet I modified from something I use regularly. You can set it up to run on task to be fully automated. You'll have to put in your desired output paths and it should run.
import arcpy
import requests
import json
import os
import sys
import traceback
from datetime import date
arcpy.env.overwriteOutput = True
class DataScraper():
def __init__(self):
# URL to map service you want to extract data from
self.service_url = 'https://apps.deq.virginia.gov/arcgis/rest/services/public/EDMA/MapServer/104'
def getServiceProperties(self, url):
URL = url
PARAMS = {'f' : 'json'}
r = requests.get(url = URL, params = PARAMS)
service_props = r.json()
return service_props
def getLayerIds(self, url, query=None):
URL = url + '/query'
PARAMS = {'f':'json', 'returnIdsOnly': True, 'where' : '1=1'}
if query:
PARAMS['where'] = "ST = '{}'".format(query)
r = requests.get(url = URL, params = PARAMS)
data = r.json()
return data['objectIds']
def getLayerDataByIds(self, url, ids):
# ids parameter should be a list of object ids
URL = url + '/query'
field = 'OBJECTID'
value = ', '.join([str(i) for i in ids])
PARAMS = {'f': 'json', 'where': '{} IN ({})'.format(field, value), 'returnIdsOnly': False, 'returnCountOnly': False, 'returnGeometry': True,
'outFields': '*'}
r = requests.post(url=URL, data=PARAMS)
layer_data = r.json()
return layer_data
# UTILITIES
def chunks(self, lst, n):
# Yield successive n-sized chunks from list
for i in range(0, len(lst), n):
yield lst[i:i + n]
if __name__ == '__main__':
# Can use date for naming iterative runs...
todays_date = date.today().strftime("%Y_%m_%d")
# Instantiate DataScraper class
ds = DataScraper()
# Specify where you want to save output JSON and feature class on your machine
out_json_path = rf"e:/data{todays_date}.json"
out_fc_path = rf"e:/data{todays_date}.shp"
# Function call to DataScraper class to get number of records, chunk up OIDs
# and send requests to serivce. Results are compiled into single JSON and then
# converted to shapefile/feature class at the end.
def scrapeData(out_json_path, out_fc_path):
try:
service_props = ds.getServiceProperties(ds.service_url)
max_record_count = service_props['maxRecordCount']
layer_ids = ds.getLayerIds(ds.service_url)
id_groups = list(ds.chunks(layer_ids, max_record_count))
for i, id_group in enumerate(id_groups):
print(' group {} of {}'.format(i+1, len(id_groups)))
layer_data = ds.getLayerDataByIds(ds.service_url, id_group)
if i==0: # If first iteration of id_groups
layer_data_final = layer_data
else:
layer_data_final['features'].extend(layer_data['features'])
print('Writing JSON file...')
with open(out_json_path, 'w') as out_json_file:
json.dump(layer_data_final, out_json_file)
print('Converting JSON to features...')
arcpy.conversion.JSONToFeatures(out_json_path, out_fc_path)
except Exception:
# Handle errors accordingly...this is generic
tb = sys.exc_info()[2]
tb_info = traceback.format_tb(tb)[0]
pymsg = f'PYTHON ERRORS:\n\tTraceback info:\t{tb_info}\n\tError Info:\t{str(sys.exc_info()[1])}\n'
msgs = f'ArcPy ERRORS:\t{arcpy.GetMessages(2)}\n'
print(pymsg)
print(msgs)
# Run the function to get your data
scrapeData(out_json_path, out_fc_path)
I can manually do it here but I want to do this programmatically
I do this kind of thing all the time! Love data mining. You have to group your requests in batches of whatever the max number of records that will be returned is, in this case 1000. Here's a code snippet I modified from something I use regularly. You can set it up to run on task to be fully automated. You'll have to put in your desired output paths and it should run.
import arcpy
import requests
import json
import os
import sys
import traceback
from datetime import date
arcpy.env.overwriteOutput = True
class DataScraper():
def __init__(self):
# URL to map service you want to extract data from
self.service_url = 'https://apps.deq.virginia.gov/arcgis/rest/services/public/EDMA/MapServer/104'
def getServiceProperties(self, url):
URL = url
PARAMS = {'f' : 'json'}
r = requests.get(url = URL, params = PARAMS)
service_props = r.json()
return service_props
def getLayerIds(self, url, query=None):
URL = url + '/query'
PARAMS = {'f':'json', 'returnIdsOnly': True, 'where' : '1=1'}
if query:
PARAMS['where'] = "ST = '{}'".format(query)
r = requests.get(url = URL, params = PARAMS)
data = r.json()
return data['objectIds']
def getLayerDataByIds(self, url, ids):
# ids parameter should be a list of object ids
URL = url + '/query'
field = 'OBJECTID'
value = ', '.join([str(i) for i in ids])
PARAMS = {'f': 'json', 'where': '{} IN ({})'.format(field, value), 'returnIdsOnly': False, 'returnCountOnly': False, 'returnGeometry': True,
'outFields': '*'}
r = requests.post(url=URL, data=PARAMS)
layer_data = r.json()
return layer_data
# UTILITIES
def chunks(self, lst, n):
# Yield successive n-sized chunks from list
for i in range(0, len(lst), n):
yield lst[i:i + n]
if __name__ == '__main__':
# Can use date for naming iterative runs...
todays_date = date.today().strftime("%Y_%m_%d")
# Instantiate DataScraper class
ds = DataScraper()
# Specify where you want to save output JSON and feature class on your machine
out_json_path = rf"e:/data{todays_date}.json"
out_fc_path = rf"e:/data{todays_date}.shp"
# Function call to DataScraper class to get number of records, chunk up OIDs
# and send requests to serivce. Results are compiled into single JSON and then
# converted to shapefile/feature class at the end.
def scrapeData(out_json_path, out_fc_path):
try:
service_props = ds.getServiceProperties(ds.service_url)
max_record_count = service_props['maxRecordCount']
layer_ids = ds.getLayerIds(ds.service_url)
id_groups = list(ds.chunks(layer_ids, max_record_count))
for i, id_group in enumerate(id_groups):
print(' group {} of {}'.format(i+1, len(id_groups)))
layer_data = ds.getLayerDataByIds(ds.service_url, id_group)
if i==0: # If first iteration of id_groups
layer_data_final = layer_data
else:
layer_data_final['features'].extend(layer_data['features'])
print('Writing JSON file...')
with open(out_json_path, 'w') as out_json_file:
json.dump(layer_data_final, out_json_file)
print('Converting JSON to features...')
arcpy.conversion.JSONToFeatures(out_json_path, out_fc_path)
except Exception:
# Handle errors accordingly...this is generic
tb = sys.exc_info()[2]
tb_info = traceback.format_tb(tb)[0]
pymsg = f'PYTHON ERRORS:\n\tTraceback info:\t{tb_info}\n\tError Info:\t{str(sys.exc_info()[1])}\n'
msgs = f'ArcPy ERRORS:\t{arcpy.GetMessages(2)}\n'
print(pymsg)
print(msgs)
# Run the function to get your data
scrapeData(out_json_path, out_fc_path)
@AaronCole1 Dang man.... thats awesome.... this one is going into Source Safe..... Cheers Dude
That's a very nice program @AaronCole1 . I've been doing a lot of scraping recently and have had good luck using FeatureClassToFeatureClass. It only works on one layer at a time, you have to feed it the REST service URL, and doesn't write to JSON like you want....
arcpy.conversion.FeatureClassToFeatureClass(in_url, os.path.dirname(out_fc), os.path.basename(out_fc))
Update: Dont mind this remark
You should just be able to replace the out_fc_path variable with the path to your data...that could be an sde feature class. You'd point to your connection file, something like "P:/Folder/Folder/ConnectionFile.sde/GIS.DBO.FeatureClassName"
Got it working... Rocking...
Just wrote it to a FGDB for now... but SDE next per your last comment.... CHEERS
import arcpy
import requests
import json
import os
import sys
import traceback
from datetime import date
arcpy.env.overwriteOutput = True
def copyData():
# Set local variables
inFeatures = 'https://apps.deq.virginia.gov/arcgis/rest/services/public/EDMA/MapServer/102'
outLocation = r"C:\Users\xxx\Desktop\GIS_projects\TestingExportData\output.gdb"
outFeatureClass = "TEST_PetTankFac"
## Add Expression
#delimitedField = arcpy.AddFieldDelimiters(arcpy.env.workspace, "NAME")
#expression = delimitedField + " = 'Post Office'"
# Run FeatureClassToFeatureClass
arcpy.conversion.FeatureClassToFeatureClass(inFeatures, outLocation, outFeatureClass)
if __name__ == '__main__':
copyData()
My question is the path and name are in one... its looking for 3 parameters
"P:/Folder/Folder/ConnectionFile.sde/GIS.DBO.FeatureClassName"
arcpy.conversion.FeatureClassToFeatureClass(inFeatures, outLocation, outFeatureClass
it would be like this:
outLocation = "P:/Folder/Folder/ConnectionFile.sde"
outFeatureClass = 'FeatureClassName'