Fix hard returns/carriages in text fields using update cursor

tigerwoulds · ‎03-03-2022

Pretty new to cursors here - trying to write a script that will remove any hard 'returns'/newlines/carriages from any text values in a feature class.

For example, a user might enter:

What I need is:

Can someone clarify why I'm getting the error TypeError: sequence size must match size of the row.

import arcpy
import os
import time

# Start the clock
startTime = time.time()

# Input geodatabase with feature classes to update
inputGDB = r'C:\data\test.gdb'

# Set workspace to input GDB
arcpy.env.workspace = inputGDB

eolchar = ""

# Get a list of feature datasets
datasets = arcpy.ListDatasets(feature_type='feature')
datasets = [''] + datasets if datasets is not None else []

# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
    for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
        # Get the name of the feature class
        path = os.path.join(arcpy.env.workspace, fc)
        desc = arcpy.Describe(path)
        fcName = desc.name
        print('Cleaning feature class: {}'.format(fcName))

        # Build a list of all TEXT/String Fields
        fieldList = [i.name for i in arcpy.ListFields(fc) if i.type == 'String']
        fieldList = ['OID@'] + fieldList

        # Iterate through each row and execute the clean
        # row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]

        with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
            for row in cursor:
                for i in row:
                    if i is not None:
                        if chr(10) in str(i) or chr(13) in str(i):
                            print('\nHard Return found in feature class: {}'.format(fc))
                            print('Object ID: {}'.format(row[0]))
                            print('There is a newline in {}'.format(i))
                            # print ('Field is {}'.format(fieldList))
                            row = i.replace(chr(10), eolchar).replace(chr(13), eolchar)
                            cursor.updateRow(row)

        print('Finished cleaning.')

print('Completed.')

If I use the following below, it works.

# row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]

But I'm trying the print the Feature Class name, OID, and Field Name if/where any changes are made. Using the code above, how can I extract the field name where hard returns/carriages are removed?

Anonymous User · ‎03-04-2022

Taking David's enumerate example and changing the variables to match the index, value tuple it creates:

# Get a list of feature datasets
datasets = [ds for ds in arcpy.ListDatasets(feature_type='feature') if ds is not None]

# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
    for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
        # Get the name of the feature class
        print('Cleaning feature class: {}'.format(fc))

        # Build a list of all TEXT/String Fields
        fieldList = [fld.name for fld in arcpy.ListFields(fc) if fld.type == 'String']
        fieldList = ['OID@'] + fieldList

        # Iterate through each row and execute the clean
        # row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]
        with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
            for row in cursor:
                changed = False
                # i is the index, val is the row[index] value
                for i, val in enumerate(row):
                    # Skip OID field, nulls and ''
                    if all([i != 0, val not in [None, '']]): #<- you can add ' ' if you want to exclude those values. 
                        # Check if the value ends in one of these
                        if val[-1] in [' ', '\t', '\n', '\r']:
                            changed = True # <- set flag to indicate this row has changes and should be updated
                            print('\nHard Return found in feature class: {}'.format(fc))
                            print('Object ID: {}'.format(row[0]))
                            print('There is a newline in {} val: {}'.format(fieldList[i], val))
                            row[i] = val.strip(' \t\n\r')

                # Check if anything changed and needs to be updated.
                if changed:
                    cursor.updateRow(row)

        print('Finished cleaning.')

View solution in original post

DavidPike · ‎03-04-2022

Probably access the field using the iteration number as an index to fieldList, maybe better ways.

with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
  for row in cursor:
    for counter, i in enumerate(row):
      current_field = fieldList[counter]

tigerwoulds · ‎03-04-2022

Great, thanks. That's what I needed to get the field name. Still getting the sequence error though.

with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
    for row in cursor:
        objectID = row[0]
        for counter, i in enumerate(row):
            current_field = fieldList[counter]
            if i is not None and chr(10) in str(i) or chr(13) in str(i):
                    print('\nError found on Object ID: {}'.format(objectID))
                    print ('Field: {}'.format(current_field))
                    print('There is a newline in: {}'.format(i.replace(chr(10), eolchar).replace(chr(13), eolchar)))
                    row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar)]
                    cursor.updateRow(row)

DonMorrison1 · ‎03-04-2022

I think if you dump out the length of 'row' after this statement if will be 1, which causes the error since it must be the length fieldList when you call updateRow

row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar)]

DonMorrison1 · ‎03-04-2022

'TypeError: sequence size must match size of the row' typically means that the length of the field list you use to create the cursor does not match the length of the list you use when doing the insert/delete/update row.

Anonymous User · ‎03-04-2022

Taking David's enumerate example and changing the variables to match the index, value tuple it creates:

# Get a list of feature datasets
datasets = [ds for ds in arcpy.ListDatasets(feature_type='feature') if ds is not None]

# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
    for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
        # Get the name of the feature class
        print('Cleaning feature class: {}'.format(fc))

        # Build a list of all TEXT/String Fields
        fieldList = [fld.name for fld in arcpy.ListFields(fc) if fld.type == 'String']
        fieldList = ['OID@'] + fieldList

        # Iterate through each row and execute the clean
        # row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]
        with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
            for row in cursor:
                changed = False
                # i is the index, val is the row[index] value
                for i, val in enumerate(row):
                    # Skip OID field, nulls and ''
                    if all([i != 0, val not in [None, '']]): #<- you can add ' ' if you want to exclude those values. 
                        # Check if the value ends in one of these
                        if val[-1] in [' ', '\t', '\n', '\r']:
                            changed = True # <- set flag to indicate this row has changes and should be updated
                            print('\nHard Return found in feature class: {}'.format(fc))
                            print('Object ID: {}'.format(row[0]))
                            print('There is a newline in {} val: {}'.format(fieldList[i], val))
                            row[i] = val.strip(' \t\n\r')

                # Check if anything changed and needs to be updated.
                if changed:
                    cursor.updateRow(row)

        print('Finished cleaning.')