I want to find junk values in all tables and columns (in a specific schema/owner/user):
Thanks.
Solved! Go to Solution.
def check_for_junk_values(layer_or_table):
fields = [f.name for f in arcpy.ListFields(layer_or_table)]
junk_values = [0, " ", " ", "-", "0", "NULL", "<NULL>", "<Null>"]
data = [dict(zip(fields, row)) for row in arcpy.da.SearchCursor(layer_or_table, fields)] # [{"Field1": 0, "Field2": 1}, {"Field1": 5, "Field2": 2}]
counts = dict()
for row in data:
for field, value in row.items():
if value in junk_values:
key = (field, value)
try:
counts[key] += 1
except KeyError:
counts[key] = 1
return counts
tables = ["TestPoints", "TestLines", "TestPolygons", "TestTable"]
# you can also use paths to your tables, or list them automatically with arcpy.ListTables() / arcpy.ListFeatureclasses()
log_table = arcpy.management.CreateTable("memory", "JunkLogTable")
arcpy.management.AddField(log_table, "TABLE_NAME", "TEXT")
arcpy.management.AddField(log_table, "COLUMN_NAME", "TEXT")
arcpy.management.AddField(log_table, "VAL_COUNT", "SHORT")
arcpy.management.AddField(log_table, "VALUE", "TEXT")
with arcpy.da.InsertCursor(log_table, ["TABLE_NAME", "COLUMN_NAME", "VALUE", "VAL_COUNT"]) as cursor:
for table in tables:
result = check_for_junk_values(table)
for key, count in result.items():
col = key[0]
val = key[1].__repr__() # __repr__() to get the quotes
cursor.insertRow([table, col, val, count])
It's possible to do this with PL/SQL.
--DROP TABLE incorrect_value_results; --CREATE TABLE incorrect_value_results (id NUMBER, table_name VARCHAR2(30), column_name VARCHAR2(30), val_count NUMBER, value varchar2(30)); TRUNCATE TABLE incorrect_value_results; DECLARE l_count NUMBER; l_inv_char_str VARCHAR2(2000); TYPE invalid_char_val_rec IS RECORD( cnt NUMBER, inv_char VARCHAR2(20) ); TYPE invalid_char_val_tab IS TABLE OF invalid_char_val_rec INDEX BY PLS_INTEGER; l_inv_char_vals invalid_char_val_tab; l_index NUMBER; BEGIN l_index := 0; -- Loop through each table in the schema FOR i IN (SELECT table_name FROM user_tables) LOOP -- Loop through each relevant column for this table -- Exclude column EVT_FROM FOR j IN (SELECT column_name, data_type FROM user_tab_cols WHERE table_name = i.table_name AND column_name NOT IN ('EVT_FROM','ANGLE','UND','OFFSET') AND table_name NOT LIKE 'A_%' AND table_name NOT LIKE 'SDE%' AND table_name NOT LIKE '%ANNO%' AND table_name NOT IN ('INCORRECT_VALUE_RESULTS','NUMBERS') AND data_type IN ('VARCHAR2', 'CHAR', 'NCHAR', 'NVARCHAR2', 'NUMBER')) LOOP IF j.data_type IN ('VARCHAR2', 'CHAR', 'NCHAR', 'NVARCHAR2') THEN EXECUTE IMMEDIATE 'SELECT COUNT(1), '||j.column_name|| ' FROM '||i.table_name|| ' WHERE UPPER('||j.column_name||') IN('' '', '' '', ''0'', ''-'', ''NULL'', ''<NULL>'' ) GROUP BY '||j.column_name BULK COLLECT INTO l_inv_char_vals; ELSIF j.data_type = 'NUMBER' THEN EXECUTE IMMEDIATE 'SELECT COUNT(1), '||j.column_name|| ' FROM '||i.table_name|| ' WHERE UPPER('||j.column_name||') <= 0 GROUP BY '||j.column_name BULK COLLECT INTO l_inv_char_vals; END IF; -- If there are results then log them l_index := l_index + 1; FORALL k IN 1..l_inv_char_vals.COUNT INSERT INTO incorrect_value_results (id, table_name, column_name, val_count, value) VALUES (l_index, i.table_name, j.column_name, l_inv_char_vals(k).cnt, l_inv_char_vals(k).inv_char); END LOOP; END LOOP; END; / COMMIT;
Pros:
Cons:
I don't need the solution to work for all geodatabase types; I just need to use it on my Oracle enterprise geodatabase. But an ArcPy solution would be more beneficial to a wider audience.
def check_for_junk_values(layer_or_table):
fields = [f.name for f in arcpy.ListFields(layer_or_table)]
junk_values = [0, " ", " ", "-", "0", "NULL", "<NULL>", "<Null>"]
data = [dict(zip(fields, row)) for row in arcpy.da.SearchCursor(layer_or_table, fields)] # [{"Field1": 0, "Field2": 1}, {"Field1": 5, "Field2": 2}]
counts = dict()
for row in data:
for field, value in row.items():
if value in junk_values:
key = (field, value)
try:
counts[key] += 1
except KeyError:
counts[key] = 1
return counts
tables = ["TestPoints", "TestLines", "TestPolygons", "TestTable"]
# you can also use paths to your tables, or list them automatically with arcpy.ListTables() / arcpy.ListFeatureclasses()
log_table = arcpy.management.CreateTable("memory", "JunkLogTable")
arcpy.management.AddField(log_table, "TABLE_NAME", "TEXT")
arcpy.management.AddField(log_table, "COLUMN_NAME", "TEXT")
arcpy.management.AddField(log_table, "VAL_COUNT", "SHORT")
arcpy.management.AddField(log_table, "VALUE", "TEXT")
with arcpy.da.InsertCursor(log_table, ["TABLE_NAME", "COLUMN_NAME", "VALUE", "VAL_COUNT"]) as cursor:
for table in tables:
result = check_for_junk_values(table)
for key, count in result.items():
col = key[0]
val = key[1].__repr__() # __repr__() to get the quotes
cursor.insertRow([table, col, val, count])
Related:
Global Arcade functions —> Example: "...a function that checks non-spatial fields for incorrect blank values."