Error downloading large size hosted feature layer as fileGDB

NareshAligeti1 — Fri, 28 Aug 2020 14:30:29 GMT

I am trying to download a 20GB feature layer as file GDB from AGOL. The large size is due to photo attachments. I followed the steps posted in this technical article but still cannot get the download to finish without error. I've also used JDownloader to download in segments but no luck. Can someone help me? Thanks.

Re: Error downloading large size hosted feature layer as fileGDB

MarianneFarretta — Fri, 28 Aug 2020 16:22:22 GMT

Hi Naresh. Can you share details about the error you're receiving?

Re: Error downloading large size hosted feature layer as fileGDB

JRhodes — Sat, 11 Dec 2021 09:29:47 GMT

Naari,

Have you tried the Python requests module to stream the download?

import requests
from pathlib import Path

replica_url = "https://services9.arcgis.com/iERBXXD4hiy1L6en/arcgis/rest/services/Example/FeatureServer/replicaFiles/my_replica.zip"

save_dir = Path("C://backup_utility//test.zip")

download_size = requests.get(url, stream=True).headers['Content-length']

with requests.get(replica_url, stream=True, timeout=30) as f:
     with open (save_dir, 'wb') as save:
          for chunk in f.iter_content(chunk_size = 1024*1024):
               save.write(chunk)
               size_on_disk = Path(save_dir).stat().st_size
               print(f"{size_on_disk} of {download_size} downloaded")
‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍

Re: Error downloading large size hosted feature layer as fileGDB

NareshAligeti1 — Tue, 15 Sep 2020 01:30:11 GMT

Thanks, Joe. I'll check it out.

Re: Error downloading large size hosted feature layer as fileGDB

JRhodes — Tue, 15 Sep 2020 17:41:50 GMT

Just noticed Line 8 should be:

with requests.get(replica_url, stream=True, timeout=30) as f:

Re: Error downloading large size hosted feature layer as fileGDB

JRhodes — Tue, 15 Sep 2020 17:52:54 GMT

Oops, I mean Line 8 should be:

download_size = requests.get(replica_url, stream=True).headers['Content-length']

Re: Error downloading large size hosted feature layer as fileGDB

JRhodes — Wed, 20 Jan 2021 21:45:57 GMT

Revisiting with another potential solution in case anyone stumbles on this. I've been having success creating replicas in "chunks" and exporting the service in smaller pieces, then merging them back together in Pro.

import arcgis.features from arcgis import GIS import urllib import json import wget #you could also use requests if preferred import sys ###### USER VARIABLES ###### org_url = 'https://your_org.maps.arcgis.com' username = 'your_username' #admin password = 'your_pa$$word' #admin save_directory = "C://pyTest//" item_ids_to_chunk = ["e9bec92473644fe0b95f2779b9cd5b15", "e9bec92473644fe0b95f2779b9cd5b15"] chunk_size = 1000 #adjust as appropriate for your data ############################ feature_iterator = chunk_size replace_list = [r' ', r'/', r':', r';', '\\', '*', '[', ']', '`', '~', '|', ',', '"', '.'] try: gis = GIS(org_url, username, password) print(f"Authenticated for {org_url}") except: print(f"Could not authenticate for {org_url}. Check credentials.") sys.exit() def sendRequest(request): response = urllib.request.urlopen(request) readResponse = response.read() jsonResponse= json.loads(readResponse) return jsonResponse #enable sync and/or extract if necessary so replica can be created def enableSyncExtract(itemFLC, token): _item_id = itemFLC.properties.serviceItemId _item = gis.content.get(_item_id) _item_url = _item.url capabilities_initial = itemFLC.properties.capabilities adm_url = _item_url.replace("/arcgis/rest/services/","/arcgis/rest/admin/services/") update_url = adm_url.replace("/FeatureServer","/FeatureServer/updateDefinition") rest = f"{update_url}?token={token}" if not itemFLC.properties.syncEnabled and not "Extract" in itemFLC.properties.capabilities: print("Enabling sync and extract") capabilities = f"{capabilities_initial},Sync,Extract" syncEnabled = "true" info = {"updateDefinition": {"capabilities" : capabilities, "syncEnabled" : syncEnabled}, "f": "json", "async": "false", "token": token} data = urllib.parse.urlencode(info).encode() req = urllib.request.Request(rest, data=data) response = sendRequest(req) return capabilities_initial elif not itemFLC.properties.syncEnabled and "Extract" in itemFLC.properties.capabilities: print("Enabling sync") capabilities = f"{capabilities_initial},Sync" syncEnabled = "true" info = {"updateDefinition": {"capabilities" : capabilities, "syncEnabled" : syncEnabled}, "f": "json", "async": "false", "token": token} data = urllib.parse.urlencode(info).encode() req = urllib.request.Request(rest, data=data) response = sendRequest(req) return capabilities_initial elif itemFLC.properties.syncEnabled and not "Extract" in itemFLC.properties.capabilities: print("Enabling extract") capabilities = f"{capabilities_initial},Extract" syncEnabled = "true" info = {"updateDefinition": {"capabilities" : capabilities, "syncEnabled" : syncEnabled}, "f": "json", "async": "false", "token": token} data = urllib.parse.urlencode(info).encode() req = urllib.request.Request(rest, data=data) response = sendRequest(req) return capabilities_initial else: return #reset sync and/or extract def inlineSyncExtractReset(capabilities_initial, _id): _item = gis.content.get(_id) resetFLC = arcgis.features.FeatureLayerCollection(_item.url, gis) update_dict = {"capabilities": capabilities_initial} resetFLC.manager.update_definition(update_dict) def generateToken(): url = "https://arcgis.com/sharing/rest/generateToken" data = {'username' : username, 'password' : password, 'referer' : "https://www.arcgis.com", 'f' : 'json'} request = urllib.request.Request(url, urllib.parse.urlencode(data).encode("utf-8")) jsonResponse = sendRequest(request) token = jsonResponse['token'] if token: print("Token successfully obtained") return token else: print("Could not obtain token. Exiting.") sys.exit() token = generateToken() for _id in item_ids_to_chunk: item_layers = [] item = gis.content.get(_id) print(f"\n-------------------------------------------\nStarting chunked backup for {item.title} ({item.id})") itemFLC = arcgis.features.FeatureLayerCollection(item.url, gis) try: capabilities_initial = enableSyncExtract(itemFLC, token) except: print(f"Could not verify or enable sync/extract for {item.title} ({item.id}). Exiting.") sys.exit() for l in itemFLC.layers: item_layers.append(l.properties.id) for t in itemFLC.tables: if not t.properties.name == 'GDB_ServiceItems': item_layers.append(t.properties.id) for layer in item_layers: chunk = 1 start_record = 0 chunk_size = feature_iterator get_feature_count_url = f"{item.url}/{layer}/query?where=1%3D1&returnIdsOnly=true&f=pjson&token={token}" request = urllib.request.Request(get_feature_count_url) jsonResponse = sendRequest(request) oid_list = jsonResponse.get('objectIds') count = max(oid_list) chunk_mod = count % chunk_size rest_of_chunks = count - chunk_mod if chunk_mod == 0: chunk_count = rest_of_chunks/chunk_size else: chunk_count = (rest_of_chunks/chunk_size) + 1 print(f"Exporting {int(chunk_count)} chunks for Layer {layer} of {item.title} ({item.id})\n-------------------------------------------") while chunk <= chunk_count: print(f"Exporting Chunk {chunk}: ObjectID {start_record} through {chunk_size}") layer_query = r'{"' + f'{layer}' + r'":{"where":"OBJECTID BETWEEN' + \ f' {start_record} AND {chunk_size}' + r'"}}' replicaURL = f"{item.url}/createReplica" data = {'f': 'json', 'replicaName': item.title.replace(" ", "_"), 'layers': layer, 'layerQueries': layer_query, 'returnAttachments': 'true', 'syncModel': 'none', 'dataFormat': 'filegdb', 'async': 'true', 'token': token} request = urllib.request.Request(replicaURL, urllib.parse.urlencode(data).encode("utf-8")) jsonResponse = sendRequest(request) if not jsonResponse: print(f"Request for ObjectIDs {start_record} to {chunk_size} failed. Trying again.") jsonResponse = sendRequest(request) if not jsonResponse: print(f"Replica creation failed for {item.title} ({item.id}).\n") continue responseUrl = jsonResponse['statusUrl'] url = f"{responseUrl}?f=json&token={token}" request = urllib.request.Request(url) jsonResponse = sendRequest(request) while not jsonResponse.get("status") == "Completed": if not jsonResponse.get("status") == "Failed": request = urllib.request.Request(url) jsonResponse = sendRequest(request) else: print(f"Replica creation failed for {item.title} ({item.id}).\n") continue jres = jsonResponse['resultUrl'] url = f"{jres}?token={token}" item_title = item.title for r in replace_list: if r in item_title: item_title = item_title.replace(r, '_') save_dir = f"{save_directory}{item_title}_{item.id}_layer{layer}_chunk{chunk}.zip" wget.download(url, save_dir) #you could use requests if preferred start_record = (chunk * feature_iterator) + 1 chunk_size = (chunk * feature_iterator) + feature_iterator chunk += 1 if capabilities_initial: try: inlineSyncExtractReset(capabilities_initial, _id) print(f"Capability reset successful") except: print(f"***Capability reset failed for {item.title} ({item.id}). Reset manually (original capabilities: {capabilities_initial})***")

topic Re: Error downloading large size hosted feature layer as fileGDB in ArcGIS Online Questions

Error downloading large size hosted feature layer as fileGDB

Re: Error downloading large size hosted feature layer as fileGDB

Re: Error downloading large size hosted feature layer as fileGDB

Re: Error downloading large size hosted feature layer as fileGDB

Re: Error downloading large size hosted feature layer as fileGDB

Re: Error downloading large size hosted feature layer as fileGDB

Re: Error downloading large size hosted feature layer as fileGDB