I am trying to download a 20GB feature layer as file GDB from AGOL. The large size is due to photo attachments. I followed the steps posted in this technical article but still cannot get the download to finish without error. I've also used JDownloader to download in segments but no luck. Can someone help me? Thanks.
Hi Naresh. Can you share details about the error you're receiving?
Naari,
Have you tried the Python requests module to stream the download?
import requests
from pathlib import Path
replica_url = "https://services9.arcgis.com/iERBXXD4hiy1L6en/arcgis/rest/services/Example/FeatureServer/replicaFiles/my_replica.zip"
save_dir = Path("C://backup_utility//test.zip")
download_size = requests.get(url, stream=True).headers['Content-length']
with requests.get(replica_url, stream=True, timeout=30) as f:
with open (save_dir, 'wb') as save:
for chunk in f.iter_content(chunk_size = 1024*1024):
save.write(chunk)
size_on_disk = Path(save_dir).stat().st_size
print(f"{size_on_disk} of {download_size} downloaded")
Thanks, Joe. I'll check it out.
Just noticed Line 8 should be:
with requests.get(replica_url, stream=True, timeout=30) as f:
Oops, I mean Line 8 should be:
download_size = requests.get(replica_url, stream=True).headers['Content-length']
Revisiting with another potential solution in case anyone stumbles on this. I've been having success creating replicas in "chunks" and exporting the service in smaller pieces, then merging them back together in Pro.
import arcgis.features
from arcgis import GIS
import urllib
import json
import wget #you could also use requests if preferred
import sys
###### USER VARIABLES ######
org_url = 'https://your_org.maps.arcgis.com'
username = 'your_username' #admin
password = 'your_pa$$word' #admin
save_directory = "C://pyTest//"
item_ids_to_chunk = ["e9bec92473644fe0b95f2779b9cd5b15",
"e9bec92473644fe0b95f2779b9cd5b15"]
chunk_size = 1000 #adjust as appropriate for your data
############################
feature_iterator = chunk_size
replace_list = [r' ', r'/', r':', r';', '\\', '*', '[', ']', '`', '~', '|', ',', '"', '.']
try:
gis = GIS(org_url, username, password)
print(f"Authenticated for {org_url}")
except:
print(f"Could not authenticate for {org_url}. Check credentials.")
sys.exit()
def sendRequest(request):
response = urllib.request.urlopen(request)
readResponse = response.read()
jsonResponse= json.loads(readResponse)
return jsonResponse
#enable sync and/or extract if necessary so replica can be created
def enableSyncExtract(itemFLC, token):
_item_id = itemFLC.properties.serviceItemId
_item = gis.content.get(_item_id)
_item_url = _item.url
capabilities_initial = itemFLC.properties.capabilities
adm_url = _item_url.replace("/arcgis/rest/services/","/arcgis/rest/admin/services/")
update_url = adm_url.replace("/FeatureServer","/FeatureServer/updateDefinition")
rest = f"{update_url}?token={token}"
if not itemFLC.properties.syncEnabled and not "Extract" in itemFLC.properties.capabilities:
print("Enabling sync and extract")
capabilities = f"{capabilities_initial},Sync,Extract"
syncEnabled = "true"
info = {"updateDefinition": {"capabilities" : capabilities,
"syncEnabled" : syncEnabled},
"f": "json",
"async": "false",
"token": token}
data = urllib.parse.urlencode(info).encode()
req = urllib.request.Request(rest, data=data)
response = sendRequest(req)
return capabilities_initial
elif not itemFLC.properties.syncEnabled and "Extract" in itemFLC.properties.capabilities:
print("Enabling sync")
capabilities = f"{capabilities_initial},Sync"
syncEnabled = "true"
info = {"updateDefinition": {"capabilities" : capabilities,
"syncEnabled" : syncEnabled},
"f": "json",
"async": "false",
"token": token}
data = urllib.parse.urlencode(info).encode()
req = urllib.request.Request(rest, data=data)
response = sendRequest(req)
return capabilities_initial
elif itemFLC.properties.syncEnabled and not "Extract" in itemFLC.properties.capabilities:
print("Enabling extract")
capabilities = f"{capabilities_initial},Extract"
syncEnabled = "true"
info = {"updateDefinition": {"capabilities" : capabilities,
"syncEnabled" : syncEnabled},
"f": "json",
"async": "false",
"token": token}
data = urllib.parse.urlencode(info).encode()
req = urllib.request.Request(rest, data=data)
response = sendRequest(req)
return capabilities_initial
else:
return
#reset sync and/or extract
def inlineSyncExtractReset(capabilities_initial, _id):
_item = gis.content.get(_id)
resetFLC = arcgis.features.FeatureLayerCollection(_item.url, gis)
update_dict = {"capabilities": capabilities_initial}
resetFLC.manager.update_definition(update_dict)
def generateToken():
url = "https://arcgis.com/sharing/rest/generateToken"
data = {'username' : username,
'password' : password,
'referer' : "https://www.arcgis.com",
'f' : 'json'}
request = urllib.request.Request(url, urllib.parse.urlencode(data).encode("utf-8"))
jsonResponse = sendRequest(request)
token = jsonResponse['token']
if token:
print("Token successfully obtained")
return token
else:
print("Could not obtain token. Exiting.")
sys.exit()
token = generateToken()
for _id in item_ids_to_chunk:
item_layers = []
item = gis.content.get(_id)
print(f"\n-------------------------------------------\nStarting chunked backup for {item.title} ({item.id})")
itemFLC = arcgis.features.FeatureLayerCollection(item.url, gis)
try:
capabilities_initial = enableSyncExtract(itemFLC, token)
except:
print(f"Could not verify or enable sync/extract for {item.title} ({item.id}). Exiting.")
sys.exit()
for l in itemFLC.layers:
item_layers.append(l.properties.id)
for t in itemFLC.tables:
if not t.properties.name == 'GDB_ServiceItems':
item_layers.append(t.properties.id)
for layer in item_layers:
chunk = 1
start_record = 0
chunk_size = feature_iterator
get_feature_count_url = f"{item.url}/{layer}/query?where=1%3D1&returnIdsOnly=true&f=pjson&token={token}"
request = urllib.request.Request(get_feature_count_url)
jsonResponse = sendRequest(request)
oid_list = jsonResponse.get('objectIds')
count = max(oid_list)
chunk_mod = count % chunk_size
rest_of_chunks = count - chunk_mod
if chunk_mod == 0:
chunk_count = rest_of_chunks/chunk_size
else:
chunk_count = (rest_of_chunks/chunk_size) + 1
print(f"Exporting {int(chunk_count)} chunks for Layer {layer} of {item.title} ({item.id})\n-------------------------------------------")
while chunk <= chunk_count:
print(f"Exporting Chunk {chunk}: ObjectID {start_record} through {chunk_size}")
layer_query = r'{"' + f'{layer}' + r'":{"where":"OBJECTID BETWEEN' + \
f' {start_record} AND {chunk_size}' + r'"}}'
replicaURL = f"{item.url}/createReplica"
data = {'f': 'json',
'replicaName': item.title.replace(" ", "_"),
'layers': layer,
'layerQueries': layer_query,
'returnAttachments': 'true',
'syncModel': 'none',
'dataFormat': 'filegdb',
'async': 'true',
'token': token}
request = urllib.request.Request(replicaURL, urllib.parse.urlencode(data).encode("utf-8"))
jsonResponse = sendRequest(request)
if not jsonResponse:
print(f"Request for ObjectIDs {start_record} to {chunk_size} failed. Trying again.")
jsonResponse = sendRequest(request)
if not jsonResponse:
print(f"Replica creation failed for {item.title} ({item.id}).\n")
continue
responseUrl = jsonResponse['statusUrl']
url = f"{responseUrl}?f=json&token={token}"
request = urllib.request.Request(url)
jsonResponse = sendRequest(request)
while not jsonResponse.get("status") == "Completed":
if not jsonResponse.get("status") == "Failed":
request = urllib.request.Request(url)
jsonResponse = sendRequest(request)
else:
print(f"Replica creation failed for {item.title} ({item.id}).\n")
continue
jres = jsonResponse['resultUrl']
url = f"{jres}?token={token}"
item_title = item.title
for r in replace_list:
if r in item_title:
item_title = item_title.replace(r, '_')
save_dir = f"{save_directory}{item_title}_{item.id}_layer{layer}_chunk{chunk}.zip"
wget.download(url, save_dir) #you could use requests if preferred
start_record = (chunk * feature_iterator) + 1
chunk_size = (chunk * feature_iterator) + feature_iterator
chunk += 1
if capabilities_initial:
try:
inlineSyncExtractReset(capabilities_initial, _id)
print(f"Capability reset successful")
except:
print(f"***Capability reset failed for {item.title} ({item.id}). Reset manually (original capabilities: {capabilities_initial})***")