This code is for collecting information on attachments from hosted feature layers within the portal and exporting it to an Excel file.
The ArcGIS Online portal that we manage was running low on credits. Upon investigation, we found that the consumption was due to the storage of hosted feature layers by some customers. Therefore, we needed to identify which hosted feature layers were consuming the most credits. However, using ‘item.size’ only displayed the file size including attachments. So, by using the method from this URL to obtain the file size of the attachments from the layer properties, we subtracted the attachment size from the item’s total file size to obtain the file size of only the data in the hosted feature layer.
#!/usr/bin/env python
# coding: utf-8
import pandas as pd
from arcgis.gis import GIS
import time
# Log in
gis = GIS(...)
print("Logged in as " + str(gis.properties.user.username))
# Retrieve hosted feature layers from the portal
# Search for hosted feature layers
items = gis.content.search(query="owner:{}".format(gis.users.me.username), item_type="Feature Layer", max_items=1000)
# Initialize the list to store data
data_list = []
n = 0
# Process each item
for item in items:
# Get item layers
layers = item.layers
n += 1
print(n)
# Skip if no layers exist
if layers is None:
continue
# Initialize placeholders
attachmentLoops = 0
attachmentSize = 0
attachmentCount = 0
# Iterate over item layers
for l in layers:
# Skip if the first layer is a survey layer
if l.properties.name == "survey":
continue
# Check if layer supports attachments
if hasattr(l.properties, 'hasAttachments') and l.properties.hasAttachments:
# Set query values
attachmentMax = 10000
attachmentOffset = 0
continueQuery = True
# Get attachments
while continueQuery:
attachmentLoops += 1
attachments = l.attachments.search(where='1=1', max_records=attachmentMax, offset=attachmentOffset)
# Add a 5-second wait time due to the request limit error. Wait for 60 seconds every 50 requests.
if (n % 50) != 0:
time.sleep(5)
else:
print("sleep60")
time.sleep(60)
# Check for attachments
if attachments:
# Increment offset
attachmentOffset += attachmentMax
# Iterate over attachments
for a in attachments:
attachmentCount += 1
s = a.get('SIZE')
attachmentSize += s
else:
continueQuery = False
# Convert attachment size to MB
itemSizeMB = ((float(item.size) / 1024.0) / 1024.0)
attachmentSizeMB = ((float(attachmentSize) / 1024.0) / 1024.0)
itemWithoutAttachmentSizeMb = itemSizeMB - attachmentSizeMB
# Check attachment loops
if attachmentLoops == 0:
attachmentLoops = 0
else:
attachmentLoops = attachmentLoops - 1
# Add data to the list
data_list.append({
'Item ID': item.id,
'Item Title': item.title,
'Item size (MB)': round(itemSizeMB, 2),
'Number of Loops': attachmentLoops,
'Attachment Count': attachmentCount,
'Attachment Size (Bytes)': attachmentSize,
'Attachment Size (MB)': round(attachmentSizeMB, 2),
'Item size without attachment (MB)': round(itemWithoutAttachmentSizeMb, 2)
})
# Convert to DataFrame
df = pd.DataFrame(data_list)
# Output to Excel
df.to_excel('attachment_summary.xlsx', index=False)
print("Attachment summary has been saved to 'attachment_summary.xlsx'.")