Pretty new to cursors here - trying to write a script that will remove any hard 'returns'/newlines/carriages from any text values in a feature class.
For example, a user might enter:
What I need is:
Can someone clarify why I'm getting the error TypeError: sequence size must match size of the row.
import arcpy
import os
import time
# Start the clock
startTime = time.time()
# Input geodatabase with feature classes to update
inputGDB = r'C:\data\test.gdb'
# Set workspace to input GDB
arcpy.env.workspace = inputGDB
eolchar = ""
# Get a list of feature datasets
datasets = arcpy.ListDatasets(feature_type='feature')
datasets = [''] + datasets if datasets is not None else []
# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
# Get the name of the feature class
path = os.path.join(arcpy.env.workspace, fc)
desc = arcpy.Describe(path)
fcName = desc.name
print('Cleaning feature class: {}'.format(fcName))
# Build a list of all TEXT/String Fields
fieldList = [i.name for i in arcpy.ListFields(fc) if i.type == 'String']
fieldList = ['OID@'] + fieldList
# Iterate through each row and execute the clean
# row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]
with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
for row in cursor:
for i in row:
if i is not None:
if chr(10) in str(i) or chr(13) in str(i):
print('\nHard Return found in feature class: {}'.format(fc))
print('Object ID: {}'.format(row[0]))
print('There is a newline in {}'.format(i))
# print ('Field is {}'.format(fieldList))
row = i.replace(chr(10), eolchar).replace(chr(13), eolchar)
cursor.updateRow(row)
print('Finished cleaning.')
print('Completed.')
If I use the following below, it works.
# row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]
But I'm trying the print the Feature Class name, OID, and Field Name if/where any changes are made. Using the code above, how can I extract the field name where hard returns/carriages are removed?
Solved! Go to Solution.
Taking David's enumerate example and changing the variables to match the index, value tuple it creates:
# Get a list of feature datasets
datasets = [ds for ds in arcpy.ListDatasets(feature_type='feature') if ds is not None]
# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
# Get the name of the feature class
print('Cleaning feature class: {}'.format(fc))
# Build a list of all TEXT/String Fields
fieldList = [fld.name for fld in arcpy.ListFields(fc) if fld.type == 'String']
fieldList = ['OID@'] + fieldList
# Iterate through each row and execute the clean
# row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]
with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
for row in cursor:
changed = False
# i is the index, val is the row[index] value
for i, val in enumerate(row):
# Skip OID field, nulls and ''
if all([i != 0, val not in [None, '']]): #<- you can add ' ' if you want to exclude those values.
# Check if the value ends in one of these
if val[-1] in [' ', '\t', '\n', '\r']:
changed = True # <- set flag to indicate this row has changes and should be updated
print('\nHard Return found in feature class: {}'.format(fc))
print('Object ID: {}'.format(row[0]))
print('There is a newline in {} val: {}'.format(fieldList[i], val))
row[i] = val.strip(' \t\n\r')
# Check if anything changed and needs to be updated.
if changed:
cursor.updateRow(row)
print('Finished cleaning.')
Probably access the field using the iteration number as an index to fieldList, maybe better ways.
with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
for row in cursor:
for counter, i in enumerate(row):
current_field = fieldList[counter]
Great, thanks. That's what I needed to get the field name. Still getting the sequence error though.
with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
for row in cursor:
objectID = row[0]
for counter, i in enumerate(row):
current_field = fieldList[counter]
if i is not None and chr(10) in str(i) or chr(13) in str(i):
print('\nError found on Object ID: {}'.format(objectID))
print ('Field: {}'.format(current_field))
print('There is a newline in: {}'.format(i.replace(chr(10), eolchar).replace(chr(13), eolchar)))
row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar)]
cursor.updateRow(row)
I think if you dump out the length of 'row' after this statement if will be 1, which causes the error since it must be the length fieldList when you call updateRow
row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar)]
'TypeError: sequence size must match size of the row' typically means that the length of the field list you use to create the cursor does not match the length of the list you use when doing the insert/delete/update row.
Taking David's enumerate example and changing the variables to match the index, value tuple it creates:
# Get a list of feature datasets
datasets = [ds for ds in arcpy.ListDatasets(feature_type='feature') if ds is not None]
# Loop through all feature classes (including ones in feature datasets)
print('Reading Feature Classes...')
for ds in datasets:
for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
# Get the name of the feature class
print('Cleaning feature class: {}'.format(fc))
# Build a list of all TEXT/String Fields
fieldList = [fld.name for fld in arcpy.ListFields(fc) if fld.type == 'String']
fieldList = ['OID@'] + fieldList
# Iterate through each row and execute the clean
# row = [i.replace(chr(10), eolchar).replace(chr(13), eolchar) if i is not None else i for i in row]
with arcpy.da.UpdateCursor(fc, fieldList) as cursor:
for row in cursor:
changed = False
# i is the index, val is the row[index] value
for i, val in enumerate(row):
# Skip OID field, nulls and ''
if all([i != 0, val not in [None, '']]): #<- you can add ' ' if you want to exclude those values.
# Check if the value ends in one of these
if val[-1] in [' ', '\t', '\n', '\r']:
changed = True # <- set flag to indicate this row has changes and should be updated
print('\nHard Return found in feature class: {}'.format(fc))
print('Object ID: {}'.format(row[0]))
print('There is a newline in {} val: {}'.format(fieldList[i], val))
row[i] = val.strip(' \t\n\r')
# Check if anything changed and needs to be updated.
if changed:
cursor.updateRow(row)
print('Finished cleaning.')