Hi All,
I'm new to python and have created the code below after much googling. I'm trying to search through all subfolders within my directory and find all aprx's then list all of the features held within the aprx. My code achieves this but I'd like to make it more efficient by skipping over sub-folders that contain a certain sub-string.
For example I'd like to be able to remove all folders containing the string "myfolder" if the directory contained the following folders:
myfolder_1
myfolder2
testfolder
So my goal would be to only search for aprx's in the folder "testfolder".
My Code:
extensions = ('.aprx')#extention to search
exclude_directories = set(['Archived','#Archived','Archive','archived','archive','#archived','#archive','ss','SS']) #directory (only names) want to exclude
for dname, dirs, files in os.walk(r'C:\Users\MyDirectory'):
dirs[:] = [d for d in dirs if d not in exclude_directories] # exclude directory if in exclude list
for fname in files:
if(fname.lower().endswith(extensions)): #check for extension
fpath = os.path.join(dname, fname) #this generates full directory path for file
aprx = arcpy.mp.ArcGISProject(fpath)
for m in aprx.listMaps():
for lyr in m.listLayers():
if lyr.isBroken:
print("(BROKEN) " + lyr.name + " - " + m.name)
else:
if lyr.supports("DATASOURCE"):
print ("Map Layer Name: " + lyr.name + "| SZ_gis_dataname: " + lyr.dataSource[211:])
Looks like you were essentially there. This should satisfy your goal, along with some minor refactoring. Hope it helps!
import os
import arcpy
# Extension to search
extensions = ('.aprx')
# Directories to exclude
exclude_directories = set(['Archived', '#Archived', 'Archive', 'archived', 'archive', '#archived', '#archive', 'ss', 'SS'])
# Walk through the directory
for dname, dirs, files in os.walk(r'C:\Users\MyDirectory'):
dirs[:] = [d for d in dirs if d not in exclude_directories and 'myfolder' not in d.lower()] # Exclude directories
for fname in files:
if fname.lower().endswith(extensions): # Check for the .aprx extension
fpath = os.path.join(dname, fname) # Full path to the .aprx file
try:
aprx = arcpy.mp.ArcGISProject(fpath) # Open the ArcGIS Project
for m in aprx.listMaps(): # Get aprx maps
for lyr in m.listLayers(): # Get map layers
if lyr.isBroken: # Check if layer is broken
print(f"(BROKEN) {lyr.name} - {m.name}")
else:
if lyr.supports("DATASOURCE"):
print(f"Map Layer Name: {lyr.name} | SZ_gis_dataname: {lyr.dataSource}")
except Exception as e:
print(f"Error processing file {fpath}: {e}")
Hi @Username1 ,
For future reference please upload your code using clicking on the expand toolbar
and inserting your code using
Regarding you code, try this.
exclude_directories = ['Archived','#Archived','Archive','archived','archive','#archived','#archive','ss','SS'] #directory (only names) want to exclude
RemoveFilesByName = 'myfolderex'
WorkingFolder = r'C:\Users\MyDirectory'
Projects = []
for root, directory, filenames in os.walk(WorkingFolder):
for filename in filenames:
dirname = os.path.split( directory )
if dirname not in exclude_directories and all(['.aprx' in filename , RemoveFilesByName not in filename ] ) :
Projects.append( os.path.join(root, filename) )
if len( Projects ) > 0:
for project in Projects:
project = arcpy.mp.ArcGISProject(project)
maplayers = [ lyr for lyr in m.listLayers() for m in project.listMaps() if any([ lyr.supports("DATASOURCE") , lyr.isBroken ])]
for layer in maplayers:
if lyr.isBroken: print( f'(BROKEN) {lyr.name} - {m.name}' )
else: print( f'Map Layer Name: {lyr.name} | SZ_gis_dataname: {lyr.dataSource[211:])}' )
I use something like this.
import os
import arcpy
# File extensions to look for
extensions = ('.aprx',)
# Excludes folder
exclude_substrings = ['Folder']
def list_aprx_files(directory):
# Find all .aprx files in the directory, skipping excluded subdirectories
aprx_files = []
for root, dirs, files in os.walk(directory):
# Remove directories we don't want to search in
dirs[:] = [d for d in dirs if not any(sub in d for sub in exclude_substrings)]
for file in files:
if file.lower().endswith(extensions):
aprx_files.append(os.path.join(root, file))
return aprx_files
def process_layer(layer, map_name):
# Check if the layer is broken or print its data source if available
if layer.isBroken:
print(f"Broken Layer: {layer.name} | Map: {map_name}")
elif layer.supports("DATASOURCE"):
try:
print(f"Layer: {layer.name} | Map: {map_name} | Data Source: {layer.dataSource}")
except Exception as e:
print(f"Couldn't get data source for {layer.name}: {e}")
else:
print(f"Layer: {layer.name} doesn't support data sources.")
def process_aprx_file(aprx_path):
# Open the project file and process each layer in every map
try:
print(f"Opening project: {aprx_path}")
aprx = arcpy.mp.ArcGISProject(aprx_path)
for map_obj in aprx.listMaps():
for layer in map_obj.listLayers():
process_layer(layer, map_obj.name)
except Exception as e:
print(f"Failed to process {aprx_path}: {e}")
def main(directory):
# Main script to go through all .aprx files in the folder
aprx_files = list_aprx_files(directory)
if not aprx_files:
print(f"No project files (.aprx) found in {directory}.")
return
for aprx_file in aprx_files:
process_aprx_file(aprx_file)
# Change this path to the folder you want to scan
main(r'O:\***\***\***')
Thanks for this, defining each step isn't something I'd considered and not an approach I'd come accross.
The final step of my code is to write out the findings as a spreadsheet. I've been trying to get it to work with your code but haven't managed it. I assume that is would be possible to get the results and save them out as a spreadsheet?
For reference my code looks like this:
feature_service.append((m.name,
lyr.name,
lyr.dataSource[211:]))
feat_service_cols = ['Map Name','Map Layer Name','SZ_gis_dataname']
feat_service_result = pd.DataFrame(feature_service, columns=feat_service_cols)
#print (feat_service_result)
#Turn dictonaries into data frames
df_feat_service_result = pd.DataFrame.from_dict(feat_service_result)
#print (df_feat_service_result)
#write to excel
out_folder = (r'C:\MyOutputFolder\') #folder for excel
check_folder = os.path.isdir(out_folder)#does the folder exist if not make it
if not check_folder:
os.makedirs(out_folder) #make output folder if missing
ts = datetime.now().strftime('%Y%m%d%H%M%S')
path=os.path.join(out_folder, "Report_"+ datetime.today().strftime('%Y.%m.%d')+'.xlsx') #set path for excel output
with pd.ExcelWriter(path) as writer:
df_feat_service_result.to_excel(writer, sheet_name='feat_service_result')
print (f"Excel outputs can be found in {path}")
You can use the ArcGIS API for Python to read into Pandas dataframe, after which you could simply use .to_excel() method on the dataframe. It is unclear what type of data source your feature_service variable is, but I suspect one of the below would work.
Feature Layer to dataframe
Feature Class to dataframe
Here is a function I use once a year to help me clean up data, and it allows me to create a .xlsx and share it with my peers.
import arcpy
import pandas as pd
def export_sde_feature_datasets(sde_connection, output_excel):
"""
Lists feature datasets and feature classes in an SDE database
and exports them to an Excel file.
"""
sde_connection = "" #Connection to SDE database
# Set the workspace to the SDE connection
arcpy.env.workspace = sde_connection
# Empty list to store the results
data = []
# List of feature datasets in the SDE database
feature_datasets = arcpy.ListDatasets(feature_type="feature") or []
if not feature_datasets:
print("No feature datasets found in the SDE database.")
else:
for dataset in feature_datasets:
data.append([dataset, ""]) # Append dataset as a header
# Get all feature classes within the dataset
dataset_path = f"{sde_connection}\\{dataset}"
arcpy.env.workspace = dataset_path
feature_classes = arcpy.ListFeatureClasses() or []
if not feature_classes:
data.append(["", "No feature classes found"])
else:
for fc in feature_classes:
data.append(["", fc])
# Process standalone feature classes (not inside a dataset)
arcpy.env.workspace = sde_connection
standalone_feature_classes = arcpy.ListFeatureClasses() or []
data.append(["Standalone Feature Classes", ""])
if not standalone_feature_classes:
data.append(["", "No standalone feature classes found"])
else:
for fc in standalone_feature_classes:
data.append(["", fc])
# Convert list to DataFrame and export to Excel
df = pd.DataFrame(data, columns=["Feature Dataset", "Feature Class"])
df.to_excel(output_excel, index=False)
print(f"Feature datasets and feature classes have been exported to {output_excel}.")
# Example usage
sde_connection = r"C:\Users\***\AppData\Roaming\Esri\ArcGISPro\Favorites\***.sde"
output_excel = r"C:\temp\SDEFeatureDatasetFeatureClassList.xlsx"
export_sde_feature_datasets(sde_connection, output_excel)