I have the below script which does the following:
This script works when run in the interpretter, and has been running on our server when trigged by Task Scheduler overnight for some weeks. All of a sudden, now it doesn't.
I'm getting told that the feature service URL is not a valid parameter for ExportFeatures, and also not compatible with Update Cursor (according to Copilot).
I managed to find a way around the ExportFeatures issue by using Feature Layer to create a local copy.
What I can't figure out is why this was working with no issues, now isn't despite no software updates being done on the server, or data configuration changes occurring. Has ArcGIS Online restricted the use of the feature service URL as a data source in arcpy?
import arcpy
import time
import os
import csv
import tempfile
from arcgis.gis import GIS
from arcgis.features import FeatureLayerCollection
# Start Timer
startTime = time.time()
try:
import certifi
import urllib3
import ssl
import warnings
from urllib3.exceptions import InsecureRequestWarning
# Overwrite Output
arcpy.env.overwriteOutput = True
# Create a default SSL context with certificate verification
ssl_context = ssl.create_default_context(cafile=certifi.where())
http = urllib3.PoolManager(ssl_context=ssl_context)
# Make a request to verify the setup
response = http.request('GET', 'https://org-wa.maps.arcgis.com')
print("http response: " + str(response.status))
# Suppress only the single InsecureRequestWarning from urllib3 if necessary
warnings.simplefilter('ignore', InsecureRequestWarning)
# Create GIS object
print("Connecting to AGOL")
client_id = '#####'
client_secret = '#####'
gis = GIS("https://org-wa.maps.arcgis.com", client_id=client_id, client_secret=client_secret)
print("Logged in as: " + gis.properties.user.username)
# Create a temporary directory
temp_dir = tempfile.mkdtemp()
# Define the path for the temporary File Geodatabase
temp_gdb = os.path.join(temp_dir, "tempAreaHa.gdb")
# Create the File Geodatabase
arcpy.CreateFileGDB_management(temp_dir, "tempAreaHa.gdb")
# Set the workspace to the temporary File Geodatabase
arcpy.env.workspace = temp_gdb
def remove_last_part(url):
# Split the URL by '/' and join all parts except the last one
return '/'.join(url.split('/')[:-1])
def extract_fc_name(url):
# Extract the feature class name from the URL and clean it
path_parts = url.split('/')
fc_name = path_parts[-3]
return fc_name
# Read the CSV file containing feature service URLs
with open(r'\\gis-prod\orggis\Update\Feature Service Update Scripts\FeatureServiceAreaUpdates.csv', mode='r') as file:
csv_reader = csv.reader(file)
next(csv_reader) # Skip header row if present
for row in csv_reader:
featureservice = row[0]
fc_name = extract_fc_name(featureservice)
with arcpy.EnvManager(preserveGlobalIds=True):
temp_Areas = arcpy.conversion.ExportFeatures(
in_features=featureservice,
out_features=arcpy.env.workspace + "\\" + fc_name,
where_clause="",
use_field_alias_as_name="NOT_USE_ALIAS",
field_mapping=f'AreaHa "Area (ha)" true true false 0 Double 0 0,First,#,{fc_name},AreaHa,-1,-1;GlobalID "GlobalID" false false true 38 GlobalID 0 0,First,#,{fc_name},GlobalID,-1,-1',
sort_field=None
)
print(f"Features exported for {fc_name}")
# Determine which area field exists
fields = [f.name for f in arcpy.ListFields(temp_Areas)]
area_field = None
for candidate in ["AreaHa", "Area", "Hectares"]:
if candidate in fields:
area_field = candidate
break
if area_field:
print(f"Calculating area for field: {area_field}")
arcpy.management.CalculateGeometryAttributes(
in_features=temp_Areas,
geometry_property=f"{area_field} AREA",
length_unit="",
area_unit="HECTARES",
coordinate_system='PROJCS["GDA2020_Australian_Albers",GEOGCS["GDA2020",DATUM["GDA2020",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Albers"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",132.0],PARAMETER["Standard_Parallel_1",-18.0],PARAMETER["Standard_Parallel_2",-36.0],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]',
coordinate_format="SAME_AS_INPUT"
)
print(f"Areas calculated for {fc_name}")
else:
raise ValueError(f"No matching area field found in {fc_name}. Expected one of: 'AreaHa', 'Area', or 'Hectares'.")
# Remove the last part of the URL
modified_url = remove_last_part(featureservice)
# Access the feature layer collection
flc = FeatureLayerCollection(modified_url, gis)
# Disable editor tracking
disable_params = {
"editorTrackingInfo": {
"enableEditorTracking": False
}
}
flc.manager.update_definition(disable_params)
print(f"Editor tracking disabled for {fc_name}")
# Create a dictionary from the exported table
sourceFieldsList = ['GlobalID', area_field]
valueDict = {r[0]: r[1] for r in arcpy.da.SearchCursor(temp_Areas, sourceFieldsList)}
# Update the feature service using the dictionary
updateFieldsList = ['GlobalID', area_field]
print(f"\nUpdating {area_field} field in {fc_name}...\n")
with arcpy.da.UpdateCursor(featureservice, updateFieldsList) as updateRows:
for updateRow in updateRows:
keyValue = updateRow[0].upper()
if keyValue in valueDict:
if valueDict[keyValue] != updateRow[1]:
for n in range(1, len(sourceFieldsList)):
print(f"Updating {area_field} for {updateRow[0]} from {updateRow[1]} to {valueDict[keyValue]}")
updateRow[n] = valueDict[keyValue]
updateRows.updateRow(updateRow)
del valueDict
print(f"\n{area_field} field updated successfully for {fc_name}")
# Re-enable editor tracking
enable_params = {
"editorTrackingInfo": {
"enableEditorTracking": True,
"enableOwnershipAccessControl": True,
"allowOthersToUpdate": True,
"allowOthersToDelete": True
}
}
flc.manager.update_definition(enable_params)
print(f"Editor tracking re-enabled for {fc_name}\n")
endTime = time.time()
print(f"\nScript completed in {endTime - startTime} seconds.")
except Exception as e:
if "The Product License has not been initialized" in str(e):
e = "\n\n" + r"ArcGIS Pro license not available."
if "cannot open 'https" in str(e):
e = "\n\n" + r"Login to ArcGIS Online failed - check ArcGIS Pro on the FPC-GIS-01 server is logged in using an Admin account."
if "Exception: Failed to execute. Parameters are not valid." in str(e):
e = "\n\n" + r"Either login to ArcGIS Online failed - check ArcGIS Pro on the FPC-GIS-01 server is logged in using an Admin account, or Parameters need fixing if changes have been made in ArcGIS Online to the input layers."
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib, os
fromaddr = "GISNotifications@org.wa.gov.au"
toaddr = "gis@org.wa.gov.au"
msg = MIMEMultipart()
msg['From'] = fromaddr
msg['To'] = toaddr
msg['Subject'] = "Updating AreaHa field process failed (" + os.path.basename(__file__) + ")"
# Enter email body text below
body = "There has been an error in the script that checks and updates the AreaHa field in listed feature services. \n\nPlease check the script to troubleshoot any problems. \n\nException: " + str(e) # DON'T include URL's or links to the web in the body of the email or it might get blocked by the scam filters
msg.attach(MIMEText(body, 'plain'))
server = smtplib.SMTP('org-wa-gov-au.mail.protection.outlook.com')
#No login required so this section is commented out
#server.login("youremailusername", "password")
server.sendmail(fromaddr, toaddr, str(msg))
print ("\nScript failed - email notification sent to gis@org.wa.gov.au")
print ("\nException: " + str(e))
It might be worthwhile for you to read Scheduling Tasks That Access Portal Data . That thread has a discussion of different issues and ideas for accessing portal data through scheduled tasks.
Thanks. Will give it a read.
As far as I know, Cursor objects should be able to handle a FeatureService URL. What could be happening is that you aren't properly authenticated through arcpy or the arcgis.GIS object. Does the user that's logged in to the ArcPro application on the server that runs this have access to those resources?
I would also recommend breaking some of this script out into functions so you can have some more granular error handling. Using environment variables for your globals is a good idea too, that way you can just change the environment if something changes and you don't need to mess with the script.
Logging for services like this is also a must. If you're running it unattended, you want to be able to come back later and check the run logs. With a more granular setup, you could log errors and choose to exit the script or continue (e.g. continue if a single feature service failed and log that it failed, but hard fail if authentication fails or arcpy isn't licensed.)
You can then collect these errors somewhere as the script runs and format them into an email at the end. That way you're allowed to capture multiple possible unrelated failures instead of just the first one that's hit.
I re-wrote this with some of those concepts, can't really test the service functionality as I don't have any AGOL services to connect to. Hopefully you can get something out of it though.
Large Code Block
import os
from pathlib import Path
from requests import get
import logging
from time import perf_counter
from typing import Iterable
# Email Imports
from smtplib import (
SMTP_SSL,
SMTPException,
)
from email.message import EmailMessage
from email.mime.text import MIMEText
from ssl import create_default_context
# ArcGIS Realted imports
from arcpy import (
ArealUnit,
Polygon,
SpatialReference,
)
from arcpy.da import (
UpdateCursor,
SearchCursor,
Editor,
)
from arcgis.gis import (
GIS,
)
from arcgis.features import (
FeatureLayerCollection,
)
# ------ Script Config BEGIN ------ #
# Create a logger and logfile
LOGGER: logging.Logger = logging.getLogger(__file__)
log_file = Path(__file__).with_suffix('.log')
log_file.touch(exist_ok=True)
logging.basicConfig(
filename=str(log_file),
filemode='a',
format='[%(levelname)s]: %(asctime)s - %(filename)s (%(funcName)s) - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=logging.DEBUG,
)
# Set globals using Environment Variables or default values
CLIENT_ID: str = os.environ.get('CLIENT_ID', default='*****')
CLIENT_SECRET: str = os.environ.get('CLIENT_SECRET', default='*****')
AGOL_HOST: str = os.environ.get('AGOL_HOST', default='https://org-wa.maps.arcgis.com')
FEATURE_UPDATES = os.environ.get('FEATURE_LIST', default='FeatureServiceAreaUpdates.csv')
SMTP_SERVER: str = os.environ.get('SMTP_SERVER', default='org-wa-gov-au.mail.protection.outlook.com')
SMTP_FROM: str = os.environ.get('SMTP_FROM', 'GISNotifications@org.wa.gov.au')
SMTP_TO: str = os.environ.get('SMTP_TO', 'gis@org.wa.gov.au')
SMTP_SUBJECT: str = os.environ.get('SMTP_SUBJECT', f"Updating AreaHa field process failed ({os.path.basename(__file__)})")
# Parameters to set before calculation
PRE_RUN_PARAMS = {
"editorTrackingInfo": {
"enableEditorTracking": False
}
}
# Paramters to set after calculation
POST_RUN_PARAMS = {
"editorTrackingInfo": {
"enableEditorTracking": True,
"enableOwnershipAccessControl": True,
"allowOthersToUpdate": True,
"allowOthersToDelete": True
}
}
# GDA2020 / Australian Albers (EPSG:9473)
GDA2020_AUS_ALBERS = SpatialReference(9473)
# ------ Script Config END ------ #
# Collect exception messages to compile into email
# Format: [{'Readable Name': Exception(), ...}]
#
# NOTE: Because this is a mutable global, using it as the default argument
# in `build_message` means the message will always contain the errors in this
# list on call
ERRORS: list[dict[str, Exception]] = []
def send_email(from_addr: str, to_addr: str | list[str], subject: str, body: MIMEText, send_success: bool=False) -> None:
"""Send an email using the provided arguments
Arguments:
from_addr (str): The message sender
to_addr (str | list[str]): The message recipient(s)
subject (str): The message subject line
body (str): The message body
send_success (bool): Send a success message?
"""
# If a success email is not requested and there are no Error logs to send,
# Do not send the email
if not send_success and not ERRORS:
return
msg = EmailMessage()
msg['From'] = from_addr
msg['To'] = to_addr
msg['Subject'] = subject
msg.set_content(body)
# Use SSL context for sending email, if a password is required you will need to
# set it in the server context
try:
with SMTP_SSL(SMTP_SERVER, context=create_default_context()) as server:
# Consider adding these to Global environment
#server.login('username', 'password')
server.sendmail(from_addr, to_addr, str(msg))
LOGGER.info(f'report sent to {to_addr}')
except SMTPException as e:
LOGGER.error(f'Failed to send email!', exc_info=e)
raise e
def build_message(errors: list[dict[str, Exception]]=ERRORS, success_message:str|None=None) -> MIMEText:
"""Takes the Global ERROR list and formats the body of an email message to send using `send_email`
If no errors are passeed, a success message is sent
Arguments:
errors (list[dict[str, Exception]]|str): List of exceptions to parse and send in email report. (default: `ERRORS`)
success_message (str|None): If no errors are passed, this string is used as the message body (default: None)
Returns:
MIMEText: A MIMEText object with the body of the email message
"""
messages = []
if not errors:
messages.append(success_message)
else:
for error in errors: # Error record ({'name': exc, 'name2': exc})
for error_name, e in error.items():
messages.append(f'{error_name}:\n\t{e}')
return MIMEText('\n'.join(messages), 'plain')
def gather_feature_classes(fl: str|Path=FEATURE_UPDATES) -> dict[str, str]:
"""Load all FCs to update from file
Arguments:
fl (str|Path): The csv file to load features from
Returns:
(dict[str, str]): Mapping of feature name to service url
"""
try:
return {
line.split('/')[-3]: line # Map name to url (fc: https://.../<FeatureService>/ignore1(type)/ignore2(index))
for line in map(
lambda l: l.split(',')[0], # Get first column of line
Path(fl).open().readlines()[1:] # Get all lines minus headers
)
}
except (IOError, IndexError) as e:
LOGGER.error(f'Feature updates CSV is malformed or invalid', exc_info=e)
ERRORS.append({'Read CSV Failure': e})
raise e
def test_connection(host: str=AGOL_HOST) -> None:
"""Check that a connection can be established to the host
Arguments:
host (str): The host to ping (default: `AGOL_HOST`)
Raises:
Exception: Log exception, add to ERROR list and re-raise
"""
try:
response = get(host)
if response.status_code == 200:
LOGGER.info(f'{host} is alive')
return
except Exception as e:
LOGGER.error(f'Unable to communicate with {host}', exc_info=e)
ERRORS.append({'Host Connection Failure': e})
raise e
def get_gis(host: str=AGOL_HOST, client_id: str=CLIENT_ID, client_secret: str=CLIENT_SECRET) -> GIS:
"""Log into host using provided creds and return the root GIS object
Arguments:
host (str): The host to log into (default: `AGOL_HOST`)
client_id (str): The client ID to log in with (default: `CLIENT_ID`)
client_secret (str): The client secret to use (default: `CLIENT_SECRET`)
Returns:
GIS: The root GIS object to use for interacting with the host
Raises:
Exception: Log exception, add to ERROR list and re-raise
"""
try:
gis = GIS(host, client_id=client_id, client_secret=client_secret)
LOGGER.info("Logged in as: " + gis.properties.user.username)
return gis
except Exception as e:
LOGGER.error(f'Failed to log into {host} with ***{client_id[-3:]}, ***{client_secret[-3:]}', exc_info=e)
ERRORS.append({'Host Login Failure': e})
raise e
def update_definition(url: str, gis: GIS|None=None, params: dict|None=None) -> None:
"""Disable editor tracking for the feature service
Arguments:
url (str): The URL of the feature service minus index (`https://.../<Feature>/FeatureServer`)
gis (GIS|None): The GIS connection to use for updating service (default: `get_gis`)
params (dict|None): Param mapping passed to `FeatureLayerConnction.manager.update_definition` (default: None)
"""
if params is None:
return
if gis is None:
gis = get_gis()
# If the feature service id is passed in the url, remove it
if url[-1].isdigit():
url = url.rsplit('/', 1)[0]
try:
flc = FeatureLayerConnection(url, gis)
flc.manager.update_params(params)
except Exception as e:
LOGGER.error(f'Failed to update params for {url}: {params}', exc_info=e)
ERRORS.append({f'Parameter Update {url} FAILED': e})
def get_area_field(name: str, feature_class: str, valid_fields: Iterable[str]) -> str | None:
"""Validate that an fc has one of the provided fields
Will handle logging of situations with multiple matches and no matches
Arguments:
name (str): The name of the featureclass (for logging)
feature_class (str): Path to the feature class to check
valid_fields (Iterable[str]): Iterable of valid fieldnames to check
Returns:
str: The first valid field name (multiple matches will trigger logging)
"""
with SearchCursor(feature_class, ['OID@']) as cur:
# Find interseciton of valid area fields and feature class fields
# Log and warn if not
if not (matches:= set(valid_fields).intersection(set(cur.fields))):
LOGGER.warning(f'{name} does not have one of {valid_fields}, skipping!')
ERRORS.append({'Missing Area Field': KeyError(f'{name}: {cur.fields}')})
return
# Get first valid field
match = matches.pop()
# Warn and log that feature class has multiple valid area fields
if matches:
LOGGER.warning(f'{name} has multiple valid area fields {matches} using {match}')
ERRORS.append({'Multiple Area Fields': KeyError(f'{name} has multiple valid area fields {matches.add(match)}, used {match}')})
return match
def calculate_area(name: str, url: str, units: ArealUnit='Hectares',
*, # KW only below
area_fields=('AreaHA', 'Area', 'Hectares')) -> bool:
"""Calculate the area in Hectares of each feature in the service
Arguments:
name (str): The name of the feature class
url (str): The url of the feature service
units (ArealUnit): The Area unit to calculate (default: Hectares)
__KW_ONLY__
area_fields (tuple[str]): List of valid area field names to search for (default: ('AreaHA', 'Area', 'Hectares'))
Returns:
bool: True if the calculation succeeds, False if it doesn't
"""
# Validate field names
area_field = get_area_field(name, url, area_fields)
if area_field is None:
return False # Logging is handled by `get_area_field`
try:
with Editor(url):
with UpdateCursor(url, ['GLOBALID@', 'SHAPE@', area_field]) as cur:
for gid, shape, area in cur:
shape: Polygon = shape
proj_shape = shape.projectAs(GDA2020_AUS_ALBERS)
if not isinstance(proj_shape, Polygon): continue # For type hinting
calc_area = proj_shape.getArea(method='GEODESIC', units=units)
if calc_area != area: # Only update rows that require updates
cur.updateRow((gid, shape, calc_area))
return True
except Exception as e:
# Don't re-raise failure on area update, log it and carry on
# This will allow all feature updates to be attempted even if
# a registerd service is incorrectly configured
LOGGER.error(f'Failed to update area for {name}: {e}')
ERRORS.append({f'Area Update Failure {name}': e})
return False
def main() -> None:
"""Primary flow of the script
1) Test connection to host
2) Log into AGOL using creds
3) Gather all FCs to update from CSV
4) Iterate FCs and re-calculate Areas in Hectares
5) Send e-mail with errors, or success
"""
script_start_time = perf_counter()
successful_updates = []
failed_updates = []
try:
test_connection()
gis = get_gis()
fcs = gather_feature_classes()
LOGGER.info(f'Updating Areas...')
for fc_name, fc_url in fcs.items():
fc_start_time = perf_counter()
LOGGER.info(f'Updating Area for {fc_name}')
update_definition(fc_url, gis, PRE_RUN_PARAMS)
success = calculate_area(fc_name, fc_url)
update_definition(fc_url, gis, POST_RUN_PARAMS)
# Log updates
if success:
successful_updates.append(fc_name)
LOGGER.info(f'Successfully updated Area for {fc_name} in {fc_start_time-perf_counter():0.0f} seconds')
else:
failed_updates.append(fc_name)
LOGGER.warning(f'Failed to update {fc_name}')
# Log script run results
if successful_updates:
LOGGER.info(f'Updated Areas for {successful_updates} {script_start_time-perf_counter():0.0f} seconds')
if failed_updates:
LOGGER.warning(f'Failed to update {failed_updates}')
except Exception as e:
pass
send_email(
from_addr=SMTP_FROM,
to_addr=SMTP_TO,
subject=SMTP_SUBJECT,
body=build_message(
success_message=f'Updated Areas for {successful_updates} {script_start_time-perf_counter():0.0f} seconds'
),
send_success=True,
)
if __name__ == '__main__':
main()
You should also use `logging` for any automated tasks like this. I have this one set up to log everything to a .log file in the same directory as the script and print out all messages to that log. That way if something goes on, you can tail the logs and see whats failing.
Edit: Of course I find some bugs after I post this.
1) Fixed the calculation operation to apply the new value
2) Fixed the field validation to fail on an empty intersection and check for a non-empty set after the pop
3) Added a main hook
4) Fixed unpacking of the error record list in build_messages
Out if interest, is there any reason for not using the ArcGIS API for Python to handle it all? with the workflow looking like this...
I have done very similar recently and could share the code tomorrow if it is something that would help?
All the best,
Glen
I would be keen to see your code if you could share please! It's mainly down to my entry/mid level python skills and not know what options there are or how to wield them. Self learning as you go and sticking with what is familiar.
See below, I have factored in some of your steps. Depending on how of often the schedule runs I would generally use a where clause in the feature layer query() isolating features for that time period only - day, week, etc. Therefore only those where a change occurred (although might not have been geometric) are updated and not the entire dataset each time. Test on a disposable dataset first and see if it matches your expected output.
from arcgis.gis import GIS
from arcgis.features import FeatureLayer
from arcgis.features import FeatureLayerCollection
from arcgis.geometry import Geometry
## Access AGOL whichever way you need to
agol = GIS("home")
## FeatureLayer URLs (you'll read from CSV)
fl_urls = [
"URL_1",
"URL_2",
"URL_3",
]
## to diable editor tracking
disable_params = {
"editorTrackingInfo" : {
"enableEditorTracking" : False
}
}
## to enable editor tracking
enable_params = {
"editorTrackingInfo": {
"enableEditorTracking": True,
"enableOwnershipAccessControl": True,
"allowOthersToUpdate": True,
"allowOthersToDelete": True
}
}
## the possible field names to update
area_fld_names = ["AreaHa", "Area", "Hectares"]
## for each Feature Layer url
for fl_url in fl_urls:
## create FeatureLayer object
fl = FeatureLayer(url=fl_url)
## get a list of field names from the feature layer
fld_list = [fld.name for fld in fl.properties.fields]
## get which area field is used in the feature layer
#
area_fld = [fld for fld in area_fld_names if fld in fld_list]
## if an area field found, the list won't be empty
if area_fld:
## get the field name from the list
aread_fld_name = area_fld[0]
## get all features as a feature set
fs = fl.query(out_fields=[aread_fld_name], return_geometry=True)
## for each feature
for f in fs:
## update the area field
f.attributes[aread_fld_name] = Geometry(f.geometry).get_area(method="PLANAR", units="HECTARES")
## get the feature service as an item
item = agol.content.get(fl.properties.serviceItemId)
## create a FLC object
flc = FeatureLayerCollection.fromitem(item)
## turn off editor tracking
flc.manager.update_definition(disable_params)
## update the features
fl.edit_features(updates=fs)
## turn back on editor tracking
flc.manager.update_definition(enable_params)
Let me know how it goes.
Just another thought, if the ExportFeatures is causing you grief with the Feature Layer URL you could replace it with another workflow for FeatureLayer to Feature Class extraction using the ArcGIS API for Python, see below.
from arcgis.gis import GIS
from arcgis.features import FeatureLayer
## path to where feature classes will be exported
gdb_path = r"path\to\export.gdb"
## you'll read yoiurs from CSV
fl_urls = ["URL_1", "URL_2", "URL_3"]
## Access AGOL
agol = GIS("home")
for fl_url in fl_urls:
## gte the FL object
fl = FeatureLayer(url=fl_url)
## make the name consistent with feature class rules
out_name = fl.properties.name.replace(" ", "_")
## get the FeatureSet of the FeatureLayer
export = fl.query()
## save to Feature Class
export.save(
save_location = out_gdb,
out_name = out_name
)
## PERFORM HECTARE CALCULATION
...