<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Python search all folders for aprx apart from those folders containing sub-string in Python Questions</title>
    <link>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578626#M73634</link>
    <description>&lt;P&gt;Looks like you were essentially there.&amp;nbsp; This should satisfy your goal, along with some minor refactoring.&amp;nbsp; Hope it helps!&lt;/P&gt;&lt;LI-CODE lang="python"&gt;import os
import arcpy

# Extension to search
extensions = ('.aprx')

# Directories to exclude
exclude_directories = set(['Archived', '#Archived', 'Archive', 'archived', 'archive', '#archived', '#archive', 'ss', 'SS'])

# Walk through the directory
for dname, dirs, files in os.walk(r'C:\Users\MyDirectory'):
    dirs[:] = [d for d in dirs if d not in exclude_directories and 'myfolder' not in d.lower()]  # Exclude directories
    for fname in files:
        if fname.lower().endswith(extensions):  # Check for the .aprx extension
            fpath = os.path.join(dname, fname)  # Full path to the .aprx file
            try:
                aprx = arcpy.mp.ArcGISProject(fpath)  # Open the ArcGIS Project
                for m in aprx.listMaps():  # Get aprx maps
                    for lyr in m.listLayers():  # Get map layers
                        if lyr.isBroken:  # Check if layer is broken
                            print(f"(BROKEN) {lyr.name} - {m.name}")
                        else:
                            if lyr.supports("DATASOURCE"):
                                print(f"Map Layer Name: {lyr.name} | SZ_gis_dataname: {lyr.dataSource}")
            except Exception as e:
                print(f"Error processing file {fpath}: {e}")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 24 Jan 2025 13:49:58 GMT</pubDate>
    <dc:creator>Marshal</dc:creator>
    <dc:date>2025-01-24T13:49:58Z</dc:date>
    <item>
      <title>Python search all folders for aprx apart from those folders containing sub-string</title>
      <link>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578605#M73633</link>
      <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;I'm new to python and have created the code below after much googling. I'm trying to search through all subfolders within my directory and find all aprx's then list all of the features held within the aprx. My code achieves this but I'd like to make it more efficient by skipping over sub-folders that contain a certain sub-string.&lt;/P&gt;&lt;P&gt;For example I'd like to be able to remove all folders containing the string "myfolder" if the directory contained the following folders:&lt;/P&gt;&lt;P&gt;myfolder_1&lt;/P&gt;&lt;P&gt;myfolder2&lt;/P&gt;&lt;P&gt;testfolder&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;So my goal would be to only search for aprx's in the folder "testfolder".&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;My Code:&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;extensions = ('.aprx')#extention to search&lt;BR /&gt;exclude_directories = set(['Archived','#Archived','Archive','archived','archive','#archived','#archive','ss','SS']) #directory (only names) want to exclude&lt;BR /&gt;for dname, dirs, files in os.walk(r'C:\Users\MyDirectory'):&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; dirs[:] = [d for d in dirs if d not in exclude_directories] # exclude directory if in exclude list&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; for fname in files:&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if(fname.lower().endswith(extensions)): #check for extension&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; fpath = os.path.join(dname, fname) #this generates full directory path for file&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; aprx = arcpy.mp.ArcGISProject(fpath)&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for m in aprx.listMaps():&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for lyr in m.listLayers():&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if lyr.isBroken:&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; print("(BROKEN) " + lyr.name + " - " + m.name)&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; else:&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;if lyr.supports("DATASOURCE"):&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; print ("Map Layer Name: " + lyr.name + "| SZ_gis_dataname: " + lyr.dataSource[211:])&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Username1_0-1737722063734.png" style="width: 400px;"&gt;&lt;img src="https://community.esri.com/t5/image/serverpage/image-id/123972iE24D1A1B46529279/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Username1_0-1737722063734.png" alt="Username1_0-1737722063734.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Jan 2025 12:37:46 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578605#M73633</guid>
      <dc:creator>Username1</dc:creator>
      <dc:date>2025-01-24T12:37:46Z</dc:date>
    </item>
    <item>
      <title>Re: Python search all folders for aprx apart from those folders containing sub-string</title>
      <link>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578626#M73634</link>
      <description>&lt;P&gt;Looks like you were essentially there.&amp;nbsp; This should satisfy your goal, along with some minor refactoring.&amp;nbsp; Hope it helps!&lt;/P&gt;&lt;LI-CODE lang="python"&gt;import os
import arcpy

# Extension to search
extensions = ('.aprx')

# Directories to exclude
exclude_directories = set(['Archived', '#Archived', 'Archive', 'archived', 'archive', '#archived', '#archive', 'ss', 'SS'])

# Walk through the directory
for dname, dirs, files in os.walk(r'C:\Users\MyDirectory'):
    dirs[:] = [d for d in dirs if d not in exclude_directories and 'myfolder' not in d.lower()]  # Exclude directories
    for fname in files:
        if fname.lower().endswith(extensions):  # Check for the .aprx extension
            fpath = os.path.join(dname, fname)  # Full path to the .aprx file
            try:
                aprx = arcpy.mp.ArcGISProject(fpath)  # Open the ArcGIS Project
                for m in aprx.listMaps():  # Get aprx maps
                    for lyr in m.listLayers():  # Get map layers
                        if lyr.isBroken:  # Check if layer is broken
                            print(f"(BROKEN) {lyr.name} - {m.name}")
                        else:
                            if lyr.supports("DATASOURCE"):
                                print(f"Map Layer Name: {lyr.name} | SZ_gis_dataname: {lyr.dataSource}")
            except Exception as e:
                print(f"Error processing file {fpath}: {e}")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Jan 2025 13:49:58 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578626#M73634</guid>
      <dc:creator>Marshal</dc:creator>
      <dc:date>2025-01-24T13:49:58Z</dc:date>
    </item>
    <item>
      <title>Re: Python search all folders for aprx apart from those folders containing sub-string</title>
      <link>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578634#M73635</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.esri.com/t5/user/viewprofilepage/user-id/610146"&gt;@Username1&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;For future reference please upload your code using clicking on the expand toolbar&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="RPGIS_1-1737726673358.png" style="width: 400px;"&gt;&lt;img src="https://community.esri.com/t5/image/serverpage/image-id/123975i50253DDB340F8088/image-size/medium?v=v2&amp;amp;px=400" role="button" title="RPGIS_1-1737726673358.png" alt="RPGIS_1-1737726673358.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;and inserting your code using&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="RPGIS_2-1737726740412.png" style="width: 400px;"&gt;&lt;img src="https://community.esri.com/t5/image/serverpage/image-id/123976iBB4261B7FB2F4C68/image-size/medium?v=v2&amp;amp;px=400" role="button" title="RPGIS_2-1737726740412.png" alt="RPGIS_2-1737726740412.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regarding you code, try this.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;exclude_directories = ['Archived','#Archived','Archive','archived','archive','#archived','#archive','ss','SS'] #directory (only names) want to exclude
RemoveFilesByName = 'myfolderex'

WorkingFolder = r'C:\Users\MyDirectory'
Projects = []
for root, directory, filenames in os.walk(WorkingFolder):
	for filename in filenames:
		dirname = os.path.split( directory )
		if dirname not in exclude_directories and all(['.aprx' in filename , RemoveFilesByName not in filename ] ) :
			Projects.append( os.path.join(root, filename) )
if len( Projects ) &amp;gt; 0:
	for project in Projects:
		project = arcpy.mp.ArcGISProject(project)
		maplayers = [ lyr for lyr in m.listLayers() for m in project.listMaps() if any([ lyr.supports("DATASOURCE") , lyr.isBroken ])]
		for layer in maplayers:
			if lyr.isBroken: print( f'(BROKEN) {lyr.name} - {m.name}' )
			else: print( f'Map Layer Name: {lyr.name} | SZ_gis_dataname: {lyr.dataSource[211:])}' )&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Jan 2025 14:21:31 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578634#M73635</guid>
      <dc:creator>RPGIS</dc:creator>
      <dc:date>2025-01-24T14:21:31Z</dc:date>
    </item>
    <item>
      <title>Re: Python search all folders for aprx apart from those folders containing sub-string</title>
      <link>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578711#M73636</link>
      <description>&lt;P&gt;I use something like this.&lt;/P&gt;&lt;LI-CODE lang="c"&gt;import os
import arcpy

# File extensions to look for
extensions = ('.aprx',)

# Excludes folder
exclude_substrings = ['Folder']

def list_aprx_files(directory):
    # Find all .aprx files in the directory, skipping excluded subdirectories
    aprx_files = []
    for root, dirs, files in os.walk(directory):
        # Remove directories we don't want to search in
        dirs[:] = [d for d in dirs if not any(sub in d for sub in exclude_substrings)]
        for file in files:
            if file.lower().endswith(extensions):
                aprx_files.append(os.path.join(root, file))
    return aprx_files

def process_layer(layer, map_name):
    # Check if the layer is broken or print its data source if available
    if layer.isBroken:
        print(f"Broken Layer: {layer.name} | Map: {map_name}")
    elif layer.supports("DATASOURCE"):
        try:
            print(f"Layer: {layer.name} | Map: {map_name} | Data Source: {layer.dataSource}")
        except Exception as e:
            print(f"Couldn't get data source for {layer.name}: {e}")
    else:
        print(f"Layer: {layer.name} doesn't support data sources.")

def process_aprx_file(aprx_path):
    # Open the project file and process each layer in every map
    try:
        print(f"Opening project: {aprx_path}")
        aprx = arcpy.mp.ArcGISProject(aprx_path)
        for map_obj in aprx.listMaps():
            for layer in map_obj.listLayers():
                process_layer(layer, map_obj.name)
    except Exception as e:
        print(f"Failed to process {aprx_path}: {e}")

def main(directory):
    # Main script to go through all .aprx files in the folder
    aprx_files = list_aprx_files(directory)
    if not aprx_files:
        print(f"No project files (.aprx) found in {directory}.")
        return

    for aprx_file in aprx_files:
        process_aprx_file(aprx_file)

# Change this path to the folder you want to scan
main(r'O:\***\***\***')&lt;/LI-CODE&gt;</description>
      <pubDate>Fri, 24 Jan 2025 15:59:58 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1578711#M73636</guid>
      <dc:creator>TonyAlmeida</dc:creator>
      <dc:date>2025-01-24T15:59:58Z</dc:date>
    </item>
    <item>
      <title>Re: Python search all folders for aprx apart from those folders containing sub-string</title>
      <link>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1579964#M73668</link>
      <description>&lt;P&gt;Thanks for this, defining each step isn't something I'd considered and not an approach I'd come accross.&lt;/P&gt;&lt;P&gt;The final step of my code is to write out the findings as a spreadsheet. I've been trying to get it to work with your code but haven't managed it. I assume that is would be possible to get the results and save them out as a spreadsheet?&lt;BR /&gt;&lt;BR /&gt;For reference my code looks like this:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;                  feature_service.append((m.name,
                                                    lyr.name,
                                                    lyr.dataSource[211:]))
feat_service_cols = ['Map Name','Map Layer Name','SZ_gis_dataname']
 
feat_service_result = pd.DataFrame(feature_service, columns=feat_service_cols)
#print (feat_service_result)
 
#Turn dictonaries into data frames
df_feat_service_result = pd.DataFrame.from_dict(feat_service_result)
#print (df_feat_service_result)
 
#write to excel
out_folder = (r'C:\MyOutputFolder\') #folder for excel
check_folder = os.path.isdir(out_folder)#does the folder exist if not make it
 
if not check_folder:
    os.makedirs(out_folder) #make output folder if missing
ts = datetime.now().strftime('%Y%m%d%H%M%S')
path=os.path.join(out_folder, "Report_"+ datetime.today().strftime('%Y.%m.%d')+'.xlsx') #set path for excel output
 
with pd.ExcelWriter(path) as writer:  
    df_feat_service_result.to_excel(writer, sheet_name='feat_service_result')
print (f"Excel outputs can be found in {path}")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jan 2025 15:40:35 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1579964#M73668</guid>
      <dc:creator>Username1</dc:creator>
      <dc:date>2025-01-29T15:40:35Z</dc:date>
    </item>
    <item>
      <title>Re: Python search all folders for aprx apart from those folders containing sub-string</title>
      <link>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1579984#M73670</link>
      <description>&lt;P&gt;You can use the ArcGIS API for Python to read into Pandas dataframe, after which you could simply use .to_excel() method on the dataframe.&amp;nbsp; It is unclear what type of data source your feature_service variable is, but I suspect one of the below would work.&lt;/P&gt;&lt;P&gt;&lt;U&gt;&lt;STRONG&gt;Feature Layer to dataframe&lt;/STRONG&gt;&lt;/U&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://developers.arcgis.com/python/latest/api-reference/arcgis.features.toc.html#arcgis.features.GeoAccessor.from_layer" target="_blank"&gt;https://developers.arcgis.com/python/latest/api-reference/arcgis.features.toc.html#arcgis.features.GeoAccessor.from_layer&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;U&gt;&lt;STRONG&gt;Feature Class to dataframe&lt;/STRONG&gt;&lt;/U&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://developers.arcgis.com/python/latest/api-reference/arcgis.features.toc.html#arcgis.features.GeoAccessor.from_featureclass" target="_blank"&gt;https://developers.arcgis.com/python/latest/api-reference/arcgis.features.toc.html#arcgis.features.GeoAccessor.from_featureclass&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jan 2025 16:21:14 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1579984#M73670</guid>
      <dc:creator>Marshal</dc:creator>
      <dc:date>2025-01-29T16:21:14Z</dc:date>
    </item>
    <item>
      <title>Re: Python search all folders for aprx apart from those folders containing sub-string</title>
      <link>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1581363#M73691</link>
      <description>&lt;P&gt;Here is a function I use once a year to help me clean up data, and it allows me to create a .xlsx and share it with my peers.&lt;/P&gt;&lt;LI-CODE lang="c"&gt;import arcpy
import pandas as pd

def export_sde_feature_datasets(sde_connection, output_excel):
    """ 
    Lists feature datasets and feature classes in an SDE database 
    and exports them to an Excel file. 
    """

    sde_connection = "" #Connection to SDE database
    # Set the workspace to the SDE connection
    arcpy.env.workspace = sde_connection

    # Empty list to store the results
    data = []

    # List of feature datasets in the SDE database
    feature_datasets = arcpy.ListDatasets(feature_type="feature") or []

    if not feature_datasets:
        print("No feature datasets found in the SDE database.")
    else:
        for dataset in feature_datasets:
            data.append([dataset, ""])  # Append dataset as a header

            # Get all feature classes within the dataset
            dataset_path = f"{sde_connection}\\{dataset}"
            arcpy.env.workspace = dataset_path
            feature_classes = arcpy.ListFeatureClasses() or []

            if not feature_classes:
                data.append(["", "No feature classes found"])
            else:
                for fc in feature_classes:
                    data.append(["", fc])

    # Process standalone feature classes (not inside a dataset)
    arcpy.env.workspace = sde_connection
    standalone_feature_classes = arcpy.ListFeatureClasses() or []

    data.append(["Standalone Feature Classes", ""])
    
    if not standalone_feature_classes:
        data.append(["", "No standalone feature classes found"])
    else:
        for fc in standalone_feature_classes:
            data.append(["", fc])

    # Convert list to DataFrame and export to Excel
    df = pd.DataFrame(data, columns=["Feature Dataset", "Feature Class"])
    df.to_excel(output_excel, index=False)

    print(f"Feature datasets and feature classes have been exported to {output_excel}.")

# Example usage
sde_connection = r"C:\Users\***\AppData\Roaming\Esri\ArcGISPro\Favorites\***.sde"
output_excel = r"C:\temp\SDEFeatureDatasetFeatureClassList.xlsx"

export_sde_feature_datasets(sde_connection, output_excel)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 03 Feb 2025 16:38:16 GMT</pubDate>
      <guid>https://community.esri.com/t5/python-questions/python-search-all-folders-for-aprx-apart-from/m-p/1581363#M73691</guid>
      <dc:creator>TonyAlmeida</dc:creator>
      <dc:date>2025-02-03T16:38:16Z</dc:date>
    </item>
  </channel>
</rss>

