Fix hard returns/carriages in text fields using update cursor

741
5
Jump to solution
03-03-2022 06:50 PM
tigerwoulds
Occasional Contributor III

Pretty new to cursors here - trying to write a script that will remove any hard 'returns'/newlines/carriages from any text values in a feature class.

For example, a user might enter:

TigerWoulds_0-1646361110171.png

What I need is:

TigerWoulds_1-1646361143348.png

Can someone clarify why I'm getting the error TypeError: sequence size must match size of the row.

import arcpy
import os
import time

# Start the clock
startTime = time.time()

# Input geodatabase with feature classes to update
inputGDB = r'C:\data\test.gdb'

# Set workspace to input GDB
arcpy.env.workspace = inputGDB

eolchar = ""

# Get a list of feature datasets
datasets = arcpy.ListDatasets(feature_type='feature')
datasets = [''] + datasets if datasets is not None else []

# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
    for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
        # Get the name of the feature class
        path = os.path.join(arcpy.env.workspace, fc)
        desc = arcpy.Describe(path)
        fcName = desc.name
        print('Cleaning feature class: {}'.format(fcName))

        # Build a list of all TEXT/String Fields
        fieldList = [i.name for i in arcpy.ListFields(fc) if i.type == 'String']
        fieldList = ['OID@'] + fieldList

        # Iterate through each row and execute the clean
        # row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]

        with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
            for row in cursor:
                for i in row:
                    if i is not None:
                        if chr(10) in str(i) or chr(13) in str(i):
                            print('\nHard Return found in feature class: {}'.format(fc))
                            print('Object ID: {}'.format(row[0]))
                            print('There is a newline in {}'.format(i))
                            # print ('Field is {}'.format(fieldList))
                            row = i.replace(chr(10), eolchar).replace(chr(13), eolchar)
                            cursor.updateRow(row)

        print('Finished cleaning.')

print('Completed.')

 If I use the following below, it works. 

# row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]

 But I'm trying the print the Feature Class name, OID, and Field Name if/where any changes are made. Using the code above, how can I extract the field name where hard returns/carriages are removed?

0 Kudos
1 Solution

Accepted Solutions
by Anonymous User
Not applicable

Taking David's enumerate example and changing the variables to match the index, value tuple it creates:

# Get a list of feature datasets
datasets = [ds for ds in arcpy.ListDatasets(feature_type='feature') if ds is not None]

# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
    for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
        # Get the name of the feature class
        print('Cleaning feature class: {}'.format(fc))

        # Build a list of all TEXT/String Fields
        fieldList = [fld.name for fld in arcpy.ListFields(fc) if fld.type == 'String']
        fieldList = ['OID@'] + fieldList

        # Iterate through each row and execute the clean
        # row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]
        with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
            for row in cursor:
                changed = False
                # i is the index, val is the row[index] value
                for i, val in enumerate(row):
                    # Skip OID field, nulls and ''
                    if all([i != 0, val not in [None, '']]): #<- you can add ' ' if you want to exclude those values. 
                        # Check if the value ends in one of these
                        if val[-1] in [' ', '\t', '\n', '\r']:
                            changed = True # <- set flag to indicate this row has changes and should be updated
                            print('\nHard Return found in feature class: {}'.format(fc))
                            print('Object ID: {}'.format(row[0]))
                            print('There is a newline in {} val: {}'.format(fieldList[i], val))
                            row[i] = val.strip(' \t\n\r')

                # Check if anything changed and needs to be updated.
                if changed:
                    cursor.updateRow(row)

        print('Finished cleaning.')

 

View solution in original post

0 Kudos
5 Replies
DavidPike
MVP Frequent Contributor

Probably access the field using the iteration number as an index to fieldList, maybe better ways.

with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
  for row in cursor:
    for counter, i in enumerate(row):
      current_field = fieldList[counter]
                    

 

0 Kudos
tigerwoulds
Occasional Contributor III

Great, thanks. That's what I needed to get the field name. Still getting the sequence error though. 

with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
for row in cursor:
objectID = row[0]
for counter, i in enumerate(row):
current_field = fieldList[counter]
if i is not None and chr(10) in str(i) or chr(13) in str(i):
print('\nError found on Object ID: {}'.format(objectID))
print ('Field: {}'.format(current_field))
print('There is a newline in: {}'.format(i.replace(chr(10), eolchar).replace(chr(13), eolchar)))
row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar)]
cursor.updateRow(row)
0 Kudos
DonMorrison1
Occasional Contributor III

I think if you dump out the length of 'row' after this statement if will be 1, which causes the error since it must be the length fieldList when you call updateRow

row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar)]

 

0 Kudos
DonMorrison1
Occasional Contributor III

'TypeError: sequence size must match size of the row' typically means that the length of the field list you use to create the cursor does not match the length of the list you use when doing the insert/delete/update row. 

0 Kudos
by Anonymous User
Not applicable

Taking David's enumerate example and changing the variables to match the index, value tuple it creates:

# Get a list of feature datasets
datasets = [ds for ds in arcpy.ListDatasets(feature_type='feature') if ds is not None]

# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
    for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
        # Get the name of the feature class
        print('Cleaning feature class: {}'.format(fc))

        # Build a list of all TEXT/String Fields
        fieldList = [fld.name for fld in arcpy.ListFields(fc) if fld.type == 'String']
        fieldList = ['OID@'] + fieldList

        # Iterate through each row and execute the clean
        # row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]
        with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
            for row in cursor:
                changed = False
                # i is the index, val is the row[index] value
                for i, val in enumerate(row):
                    # Skip OID field, nulls and ''
                    if all([i != 0, val not in [None, '']]): #<- you can add ' ' if you want to exclude those values. 
                        # Check if the value ends in one of these
                        if val[-1] in [' ', '\t', '\n', '\r']:
                            changed = True # <- set flag to indicate this row has changes and should be updated
                            print('\nHard Return found in feature class: {}'.format(fc))
                            print('Object ID: {}'.format(row[0]))
                            print('There is a newline in {} val: {}'.format(fieldList[i], val))
                            row[i] = val.strip(' \t\n\r')

                # Check if anything changed and needs to be updated.
                if changed:
                    cursor.updateRow(row)

        print('Finished cleaning.')

 

0 Kudos