Python Code for Writing Domain Description instead of Domain Code to Docx

Boom87 · ‎01-10-2025

Hi everyone,

I have created a python script and run it within ArcGIS Pro that takes attribute values from a feature class and populates them into << >> placeholders within a formatted docx template.

I am having issues in ensuring that the domain descriptions are passed through to the <<DIVISION>> placeholder, as right now the script populates the coded value domain rather than the full domain description (i.e. USA instead of United States of America). Is there a way to alter my python script in order to ensure the full description values are being passed through?

import arcpy
import docx
from datetime import datetime
import os

# Define the paths
inputfilename = r"C:\Users\UserName\Desktop\GTATable\FieldSummary_TEMPLATE.docx"
outputfolder = r"C:\Users\UserName\Desktop\Table\GeneratedDocuments"
feature_class = r"C:\Users\UserName\Documents\ArcGIS\Projects\Table\ACCS.gdb\FieldWork"

# Function to replace text in paragraphs and tables
def docx_find_replace_text(doc, old_text, new_text):
    for paragraph in doc.paragraphs:
        if old_text in paragraph.text:
            for run in paragraph.runs:
                if old_text in run.text:
                    run.text = run.text.replace(old_text, new_text)

    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    if old_text in paragraph.text:
                        for run in paragraph.runs:
                            if old_text in run.text:
                                run.text = run.text.replace(old_text, new_text)

# Function to replace text in headers
def docx_find_replace_header(doc, old_text, new_text):
    """Replace text in the header, handling complex layouts like tables and fragmented runs."""
    for section in doc.sections:
        header = section.header
        for paragraph in header.paragraphs:
            replace_text_in_paragraph(paragraph, old_text, new_text)
        for table in header.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        replace_text_in_paragraph(paragraph, old_text, new_text)

# Function to replace text in footers
def docx_find_replace_footer(doc, old_text, new_text):
    """Replace text in the footer, handling placeholders in paragraphs and tables."""
    for section in doc.sections:
        footer = section.footer
        for paragraph in footer.paragraphs:
            replace_text_in_paragraph(paragraph, old_text, new_text)
        for table in footer.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        replace_text_in_paragraph(paragraph, old_text, new_text)

# Function to handle fragmented runs in paragraphs
def replace_text_in_paragraph(paragraph, old_text, new_text):
    """Replace placeholder text in a paragraph, handling fragmented runs."""
    full_text = ''.join(run.text for run in paragraph.runs)
    if old_text in full_text:
        updated_text = full_text.replace(old_text, new_text)
        for run in paragraph.runs:
            run.text = ""
        if paragraph.runs:
            paragraph.runs[0].text = updated_text

# Function to replace the date in the footer
def replace_date_in_footer(doc):
    """Replace the <<DATE2>> placeholder with the current date in the footer."""
    current_date = datetime.now().strftime("%d %B %Y")
    docx_find_replace_footer(doc, "<<DATE2>>", current_date)

# Define the mapping between placeholders and feature class fields
field_mapping = {
    "<<PROJECT_NAME>>": "PROJECT_NAME",
    "<<FIELD_DATE>>": "FIELD_DATE",
    "<<ARCH_CREW>>": "ARCH_CREW",
    "<<PERMIT>>": "PERMIT",
    "<<DIVISION>>": "DIVISION",
    "<<METHOD>>": "METHOD",
    "<<DIST_EXIST>>": "DIST_EXIST",
    "<<DESCRIPTION>>": "DESCRIPTION",
    "<<DIST_REQ>>": "DIST_REQ",
    "<<HISTORY>>": "HISTORY",
    "<<SUB_OB>>": "SUB_OB",
    "<<ARCH_OB>>": "ARCH_OB",
    "<<REC>>": "REC"
}

# Get the feature class fields
feature_fields = list(field_mapping.values()) + ["last_edited_date"]

# Iterate through the feature class
with arcpy.da.SearchCursor(feature_class, feature_fields) as cursor:
    for feature in cursor:
        last_edited_date = feature[-1]  # Get the last_edited_date field

        # Skip records with no last_edited_date
        if last_edited_date is None:
            print("Skipping record with no last_edited_date.")
            continue

        project_name_index = feature_fields.index("PROJECT_NAME")
        project_name = feature[project_name_index]

        # Generate output filename based on PROJECT_NAME
        output_filename = os.path.join(outputfolder, f"{project_name}_FieldSummary.docx")

        # Check if the document needs to be created or updated
        if os.path.exists(output_filename):
            # Get the modification time of the existing document
            doc_mod_time = datetime.fromtimestamp(os.path.getmtime(output_filename))

            # Skip if the document is up-to-date
            if doc_mod_time >= last_edited_date:
                print(f"Skipping {output_filename}, already up-to-date.")
                continue

        # Create or update the document
        doc = docx.Document(inputfilename)

        for placeholder, field in field_mapping.items():
            field_index = feature_fields.index(field)
            value = feature[field_index]
            docx_find_replace_text(doc, placeholder, str(value))
            if placeholder in ["<<PROJECT_NAME>>", "<<DESCRIPTION>>"]:
                docx_find_replace_header(doc, placeholder, str(value))

        # Replace the date in the footer
        replace_date_in_footer(doc)

        # Save the document
        doc.save(output_filename)
        print(f"Created or updated: {output_filename}")

print("Process completed.")

Thank you kindly for any tips!

DavidSolari · ‎01-10-2025

If you just need the domain codes @AlfredBaldenweck's method works well and is nice & simple. If your data has subtypes, you'll have to use arcpy.da.ListSubtypes and then plug in the value of the subtype field to get the appropriate info, otherwise you might pick the wrong domain for your current record.

View solution in original post

AlfredBaldenweck · ‎01-10-2025

Try working with the domain directly?

something like (untested)

gdb = r"C:\my.gdb"
domName = "Domain1"
dom = [d for d darcpy.listDomains(gdb)if d.name == domName][0]
dom = dom.codedValues

Then, around line 122, look up your value in the domain.

Haven't tried this myself, but that's what I'd try first (and I wish I had known about this three years ago when I was working with domains-- I ended up exporting to table and using a search cursor to generate the dictionary. Barf)

DavidSolari · ‎01-10-2025

If you just need the domain codes @AlfredBaldenweck's method works well and is nice & simple. If your data has subtypes, you'll have to use arcpy.da.ListSubtypes and then plug in the value of the subtype field to get the appropriate info, otherwise you might pick the wrong domain for your current record.

AlfredBaldenweck · ‎01-10-2025

Good call. I never would have considered subtypes.

Boomer87 · ‎01-12-2025

Thanks both for the tips @DavidSolari @AlfredBaldenweck ! I do have a subtype of DEPT in my feature class, in which the DIVISION field is dependent on. So a user would select a value in DEPT, from which specific domains would be made available in the DIVISION field.

I've looked into the arcpy.da.ListSubtypes (new to this) but am still getting the coded domain value instead of the full domain description from the domain for DIVISION written to the docx.

import arcpy
import docx
from datetime import datetime
import os

# Define the paths
inputfilename = r"C:\Users\USer\Desktop\Table\FieldSummary_TEMPLATE.docx"
outputfolder = r"C:\Users\User\Desktop\Table\GeneratedDocuments"
feature_class = r"C:\Users\User\Documents\ArcGIS\Projects\ACCS\ACCS.gdb\GTAFieldWork"

# Function to replace text in paragraphs and tables
def docx_find_replace_text(doc, old_text, new_text):
    for paragraph in doc.paragraphs:
        if old_text in paragraph.text:
            for run in paragraph.runs:
                if old_text in run.text:
                    run.text = run.text.replace(old_text, new_text)

    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    if old_text in paragraph.text:
                        for run in paragraph.runs:
                            if old_text in run.text:
                                run.text = run.text.replace(old_text, new_text)

# Function to replace text in headers
def docx_find_replace_header(doc, old_text, new_text):
    for section in doc.sections:
        header = section.header
        for paragraph in header.paragraphs:
            replace_text_in_paragraph(paragraph, old_text, new_text)
        for table in header.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        replace_text_in_paragraph(paragraph, old_text, new_text)

# Function to replace text in footers
def docx_find_replace_footer(doc, old_text, new_text):
    for section in doc.sections:
        footer = section.footer
        for paragraph in footer.paragraphs:
            replace_text_in_paragraph(paragraph, old_text, new_text)
        for table in footer.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        replace_text_in_paragraph(paragraph, old_text, new_text)

# Function to handle fragmented runs in paragraphs
def replace_text_in_paragraph(paragraph, old_text, new_text):
    full_text = ''.join(run.text for run in paragraph.runs)
    if old_text in full_text:
        updated_text = full_text.replace(old_text, new_text)
        for run in paragraph.runs:
            run.text = ""
        if paragraph.runs:
            paragraph.runs[0].text = updated_text

# Function to replace the date in the footer
def replace_date_in_footer(doc):
    current_date = datetime.now().strftime("%d %B %Y")
    docx_find_replace_footer(doc, "<<DATE2>>", current_date)

# Define the mapping between placeholders and feature class fields
field_mapping = {
    "<<PROJECT_NAME>>": "PROJECT_NAME",
    "<<FIELD_DATE>>": "FIELD_DATE",
    "<<ARCH_CREW>>": "ARCH_CREW",
    "<<PERMIT>>": "PERMIT",
    "<<DIVISION>>": "DIVISION",
    "<<METHOD>>": "METHOD",
    "<<DIST_EXIST>>": "DIST_EXIST",
    "<<DESCRIPTION>>": "DESCRIPTION",
    "<<DIST_REQ>>": "DIST_REQ",
    "<<HISTORY>>": "HISTORY",
    "<<SUB_OB>>": "SUB_OB",
    "<<ARCH_OB>>": "ARCH_OB",
    "<<REC>>": "REC",
    "<<DEPT>>": "DEPT"
}

# Add the subtype field to the fields being queried
subtype_field = "DEPT"
feature_fields = list(field_mapping.values()) + ["last_edited_date", subtype_field]

# Retrieve domain descriptions for the DEPT field
domains = arcpy.da.ListDomains(arcpy.Describe(feature_class).path)
dept_domain = next((d for d in domains if d.name == subtype_field), None)
dept_coded_values = dept_domain.codedValues if dept_domain else {}

# Iterate through the feature class
with arcpy.da.SearchCursor(feature_class, feature_fields) as cursor:
    for feature in cursor:
        last_edited_date = feature[-2]
        subtype_code = feature[-1]

        # Skip records with no last_edited_date
        if last_edited_date is None:
            print("Skipping record with no last_edited_date.")
            continue

        project_name_index = feature_fields.index("PROJECT_NAME")
        project_name = feature[project_name_index]

        # Generate output filename
        output_filename = os.path.join(outputfolder, f"{project_name}_FieldSummary.docx")

        # Check if the document needs to be updated
        if os.path.exists(output_filename):
            doc_mod_time = datetime.fromtimestamp(os.path.getmtime(output_filename))
            if doc_mod_time >= last_edited_date:
                print(f"Skipping {output_filename}, already up-to-date.")
                continue

        # Create or update the document
        doc = docx.Document(inputfilename)

        for placeholder, field in field_mapping.items():
            field_index = feature_fields.index(field)
            value = feature[field_index]

            # Map DEPT (subtype) to its description
            if field == "DEPT" and value in dept_coded_values:
                value = dept_coded_values[value]

            # Handle DIVISION with subtypes and domains
            if field == "DIVISION":
                subtype_group = arcpy.da.ListSubtypes(feature_class)
                if subtype_code in subtype_group:
                    subtype_info = subtype_group[subtype_code]
                    field_values = subtype_info.get("FieldValues", {})
                    division_info = field_values.get("DIVISION", None)
                    if isinstance(division_info, tuple) and len(division_info) > 1:
                        domain_name = division_info[1]
                        division_domain = next((d for d in domains if d.name == domain_name), None)
                        if division_domain and value in division_domain.codedValues:
                            value = division_domain.codedValues[value]

            # Replace placeholders in the template
            docx_find_replace_text(doc, placeholder, str(value))
            if placeholder in ["<<PROJECT_NAME>>", "<<DESCRIPTION>>"]:
                docx_find_replace_header(doc, placeholder, str(value))

        # Replace the date in the footer
        replace_date_in_footer(doc)

        # Save the document
        doc.save(output_filename)
        print(f"Created or updated: {output_filename}")

print("Process completed.")

Boom87 · ‎01-13-2025

@AlfredBaldenweck @DavidSolari

Thanks again for all the input and advising of the subtype consideration! Actually, I was playing around more with the syntax and found that mapping out the subtype values and domains associated with it helped and populates correctly now 🙂

import arcpy
import docx
from datetime import datetime
import os

# Define paths
inputfilename = r"C:\Users\User\Desktop\Table\FieldSummary_TEMPLATE.docx"
outputfolder = r"C:\Users\User\Desktop\Table\GeneratedDocuments"
feature_class = r"C:\Users\User\Documents\ArcGIS\Projects\ACCS\ACCS.gdb\FieldWork"

# Define field mapping
field_mapping = {
    "<<PROJECT_NAME>>": "PROJECT_NAME",
    "<<FIELD_DATE>>": "FIELD_DATE",
    "<<ARCH_CREW>>": "ARCH_CREW",
    "<<PERMIT>>": "PERMIT",
    "<<DIVISION>>": "DIVISION",
    "<<METHOD>>": "METHOD",
    "<<DIST_EXIST>>": "DIST_EXIST",
    "<<DESCRIPTION>>": "DESCRIPTION",
    "<<DIST_REQ>>": "DIST_REQ",
    "<<HISTORY>>": "HISTORY",
    "<<SUB_OB>>": "SUB_OB",
    "<<ARCH_OB>>": "ARCH_OB",
    "<<REC>>": "REC"
}

# Subtype to domain mapping
subtype_to_domain = {
    0: "DBLBranch",
    1: "ParksBranch",
    2: "ParksBranch",
    3: "REFMBranch",
    4: "CMOBranch"
}

# Utility functions
def docx_find_replace_text(doc, old_text, new_text):
    for paragraph in doc.paragraphs:
        if old_text in paragraph.text:
            for run in paragraph.runs:
                if old_text in run.text:
                    run.text = run.text.replace(old_text, new_text)
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    if old_text in paragraph.text:
                        for run in paragraph.runs:
                            if old_text in run.text:
                                run.text = run.text.replace(old_text, new_text)

def replace_date_in_footer(doc):
    current_date = datetime.now().strftime("%d %B %Y")
    for section in doc.sections:
        footer = section.footer
        for paragraph in footer.paragraphs:
            if "<<DATE2>>" in paragraph.text:
                for run in paragraph.runs:
                    if "<<DATE2>>" in run.text:
                        run.text = run.text.replace("<<DATE2>>", current_date)

# Retrieve domain descriptions
domains = arcpy.da.ListDomains(arcpy.Describe(feature_class).path)

def get_domain_description(domain_name, code):
    """Retrieve the description for a coded value from the specified domain."""
    domain = next((d for d in domains if d.name == domain_name), None)
    if domain and code in domain.codedValues:
        return domain.codedValues[code]
    return code  # Return the code itself if no description is found

# Query the feature class
subtype_field = "DEPT"  # Subtype field
feature_fields = list(field_mapping.values()) + ["last_edited_date", subtype_field]

with arcpy.da.SearchCursor(feature_class, feature_fields) as cursor:
    for feature in cursor:
        last_edited_date = feature[-2]
        subtype_code = feature[-1]

        if last_edited_date is None:
            print("Skipping record with no last_edited_date.")
            continue

        project_name_index = feature_fields.index("PROJECT_NAME")
        project_name = feature[project_name_index]

        output_filename = os.path.join(outputfolder, f"{project_name}_FieldSummary.docx")

        if os.path.exists(output_filename):
            doc_mod_time = datetime.fromtimestamp(os.path.getmtime(output_filename))
            if doc_mod_time >= last_edited_date:
                print(f"Skipping {output_filename}, already up-to-date.")
                continue

        doc = docx.Document(inputfilename)

        for placeholder, field in field_mapping.items():
            field_index = feature_fields.index(field)
            value = feature[field_index]

            # Handle DIVISION with subtype-specific domains
            if field == "DIVISION":
                domain_name = subtype_to_domain.get(subtype_code)
                if domain_name:
                    value = get_domain_description(domain_name, value)

            # Replace placeholders in the template
            docx_find_replace_text(doc, placeholder, str(value))

        # Replace date in the footer
        replace_date_in_footer(doc)

        doc.save(output_filename)
        print(f"Created or updated: {output_filename}")

print("Process completed.")

Boom87 · ‎01-13-2025

Sorry one last question to pick your brain @AlfredBaldenweck !

For some reason, prior to entering in the subtype and domain list code, the code was writing perfectly to the header and footer sections. In the header, it was writing <<PROJECT_NAME>> and <<DESCRIPTION>> properly and in the footer <<DATE2>>. After getting the subtype and domain functionality to work, now the header and footer functionality has stopped working and is just leaving the placeholders in the docx. Is there anything strange you notice in the syntax snippet that would cause this to be the case? Thanks so much again in advance!

HaydenWelch · ‎01-14-2025

It seems that there are some differences in the docx helper functions between your first post and your last one. I think those are to blame. Also here's a version that brings the computation out of a the SearchCursor and adds some basic command line interface so you can run this with a bash script on a schedule:

import arcpy
import arcpy.typing
import arcpy.typing.describe
import docx
from datetime import datetime
from pathlib import Path

# Utility functions
def docx_find_replace_text(doc, old_text, new_text):
    for paragraph in doc.paragraphs:
        if old_text in paragraph.text:
            for run in paragraph.runs:
                if old_text in run.text:
                    run.text = run.text.replace(old_text, new_text)
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    if old_text in paragraph.text:
                        for run in paragraph.runs:
                            if old_text in run.text:
                                run.text = run.text.replace(old_text, new_text)

def replace_date_in_footer(doc):
    current_date = datetime.now().strftime("%d %B %Y")
    for section in doc.sections:
        footer = section.footer
        for paragraph in footer.paragraphs:
            if "<<DATE2>>" in paragraph.text:
                for run in paragraph.runs:
                    if "<<DATE2>>" in run.text:
                        run.text = run.text.replace("<<DATE2>>", current_date)

def as_dict(cursor: arcpy.da.SearchCursor):
    """Convert a search cursor to a dictionary."""
    yield from (dict(zip(cursor.fields, row)) for row in cursor)
    
def get_domains(feature_class) -> dict[str, arcpy.da.Domain]:
    """Return a dictionary of domains in the workspace."""
    fc_desc = arcpy.Describe(feature_class)
    return {domain.name: domain for domain in arcpy.da.ListDomains(fc_desc.workspace.catalogPath)}

def generate_field_summary(inputfilename, outputfolder, feature_class):
    # Define paths
    inputfilename = Path(inputfilename)
    outputfolder = Path(outputfolder)
    feature_class = Path(feature_class)
    
    # Define field mapping
    field_mapping = {
        "<<PROJECT_NAME>>": "PROJECT_NAME",
        "<<FIELD_DATE>>": "FIELD_DATE",
        "<<ARCH_CREW>>": "ARCH_CREW",
        "<<PERMIT>>": "PERMIT",
        "<<DIVISION>>": "DIVISION",
        "<<METHOD>>": "METHOD",
        "<<DIST_EXIST>>": "DIST_EXIST",
        "<<DESCRIPTION>>": "DESCRIPTION",
        "<<DIST_REQ>>": "DIST_REQ",
        "<<HISTORY>>": "HISTORY",
        "<<SUB_OB>>": "SUB_OB",
        "<<ARCH_OB>>": "ARCH_OB",
        "<<REC>>": "REC"
    }

    # Subtype to domain mapping
    subtype_to_domain = {
        0: "DBLBranch",
        1: "ParksBranch",
        2: "ParksBranch",
        3: "REFMBranch",
        4: "CMOBranch"
    }

    subtype_field = "DEPT"  # Subtype field
    feature_fields = list(field_mapping.values()) + ['last_edited_date', subtype_field]

    features: list[dict[str, str | int | float]] = [
        row 
        for row in as_dict(arcpy.da.SearchCursor(feature_class, feature_fields))
    ]
    domains = get_domains(str(feature_class))

    for feature in features:
        last_edited_date = feature['last_edited_date']
        subtype_code = feature[subtype_field]

        if not last_edited_date:
            print("Skipping record with no last_edited_date.")
            continue

        output_file = outputfolder / f"{feature['PROJECT_NAME']}_FieldSummary.docx"
        if output_file.exists():
            doc_mod_time = datetime.fromtimestamp(output_file.stat().st_mtime)
            if doc_mod_time >= last_edited_date:
                print(f"Skipping {output_file}, already up-to-date.")
                continue

        doc = docx.Document(str(inputfilename))
        for placeholder, field in field_mapping.items():
            value = feature[field]

            # Handle DIVISION with subtype-specific domains
            if field != "DIVISION" and value in subtype_to_domain:
                value = domains[subtype_to_domain[subtype_code]]

            docx_find_replace_text(doc, placeholder, str(value))
            # Replace Header
            # docx_replace_header(doc, placeholder, value) # This is not implemented
            
        # Replace date in the footer
        replace_date_in_footer(doc)
        
        doc.save(str(output_file))
        
        print(f"Created or updated: {output_file.name}")
    print("Process completed.")

if __name__ == "__main__":
    from argparse import ArgumentParser
    
    parser = ArgumentParser(
        name="Generate Field Summary",
        description="Generates and maintains field summary documents for fieldwork records."
    )
    parser.add_argument("-i", "--inputfile", help="Input filename")
    parser.add_argument("-o", "--outputfile", help="Output folder")
    parser.add_argument("-f", "-featureclass", help="Feature class")
    args = parser.parse_args()
    
    generate_field_summary(args.inputfile, args.outputfile, args.featureclass)