I want to find junk values in all tables and columns (in a specific schema/owner/user):
Thanks.
Solved! Go to Solution.
def check_for_junk_values(layer_or_table):
fields = [f.name for f in arcpy.ListFields(layer_or_table)]
junk_values = [0, " ", " ", "-", "0", "NULL", "<NULL>", "<Null>"]
data = [dict(zip(fields, row)) for row in arcpy.da.SearchCursor(layer_or_table, fields)] # [{"Field1": 0, "Field2": 1}, {"Field1": 5, "Field2": 2}]
counts = dict()
for row in data:
for field, value in row.items():
if value in junk_values:
key = (field, value)
try:
counts[key] += 1
except KeyError:
counts[key] = 1
return counts
tables = ["TestPoints", "TestLines", "TestPolygons", "TestTable"]
# you can also use paths to your tables, or list them automatically with arcpy.ListTables() / arcpy.ListFeatureclasses()
log_table = arcpy.management.CreateTable("memory", "JunkLogTable")
arcpy.management.AddField(log_table, "TABLE_NAME", "TEXT")
arcpy.management.AddField(log_table, "COLUMN_NAME", "TEXT")
arcpy.management.AddField(log_table, "VAL_COUNT", "SHORT")
arcpy.management.AddField(log_table, "VALUE", "TEXT")
with arcpy.da.InsertCursor(log_table, ["TABLE_NAME", "COLUMN_NAME", "VALUE", "VAL_COUNT"]) as cursor:
for table in tables:
result = check_for_junk_values(table)
for key, count in result.items():
col = key[0]
val = key[1].__repr__() # __repr__() to get the quotes
cursor.insertRow([table, col, val, count])
It's possible to do this with PL/SQL.
--DROP TABLE incorrect_value_results;
--CREATE TABLE incorrect_value_results (id NUMBER, table_name VARCHAR2(30), column_name VARCHAR2(30), val_count NUMBER, value varchar2(30));
TRUNCATE TABLE incorrect_value_results;
DECLARE
l_count NUMBER;
l_inv_char_str VARCHAR2(2000);
TYPE invalid_char_val_rec IS RECORD(
cnt NUMBER,
inv_char VARCHAR2(20)
);
TYPE invalid_char_val_tab IS TABLE OF invalid_char_val_rec
INDEX BY PLS_INTEGER;
l_inv_char_vals invalid_char_val_tab;
l_index NUMBER;
BEGIN
l_index := 0;
-- Loop through each table in the schema
FOR i IN (SELECT table_name
FROM user_tables)
LOOP
-- Loop through each relevant column for this table
-- Exclude column EVT_FROM
FOR j IN (SELECT column_name, data_type
FROM user_tab_cols
WHERE table_name = i.table_name
AND column_name NOT IN ('EVT_FROM','ANGLE','UND','OFFSET')
AND table_name NOT LIKE 'A_%'
AND table_name NOT LIKE 'SDE%'
AND table_name NOT LIKE '%ANNO%'
AND table_name NOT IN ('INCORRECT_VALUE_RESULTS','NUMBERS')
AND data_type IN ('VARCHAR2', 'CHAR', 'NCHAR', 'NVARCHAR2', 'NUMBER'))
LOOP
IF j.data_type IN ('VARCHAR2', 'CHAR', 'NCHAR', 'NVARCHAR2') THEN
EXECUTE IMMEDIATE
'SELECT COUNT(1), '||j.column_name||
' FROM '||i.table_name||
' WHERE UPPER('||j.column_name||') IN('' '', '' '', ''0'', ''-'', ''NULL'', ''<NULL>'' )
GROUP BY '||j.column_name
BULK COLLECT INTO l_inv_char_vals;
ELSIF j.data_type = 'NUMBER' THEN
EXECUTE IMMEDIATE
'SELECT COUNT(1), '||j.column_name||
' FROM '||i.table_name||
' WHERE UPPER('||j.column_name||') <= 0
GROUP BY '||j.column_name
BULK COLLECT INTO l_inv_char_vals;
END IF;
-- If there are results then log them
l_index := l_index + 1;
FORALL k IN 1..l_inv_char_vals.COUNT
INSERT INTO incorrect_value_results (id, table_name, column_name, val_count, value)
VALUES (l_index,
i.table_name,
j.column_name,
l_inv_char_vals(k).cnt,
l_inv_char_vals(k).inv_char);
END LOOP;
END LOOP;
END;
/
COMMIT;Pros:
Cons:
I don't need the solution to work for all geodatabase types; I just need to use it on my Oracle enterprise geodatabase. But an ArcPy solution would be more beneficial to a wider audience.
def check_for_junk_values(layer_or_table):
fields = [f.name for f in arcpy.ListFields(layer_or_table)]
junk_values = [0, " ", " ", "-", "0", "NULL", "<NULL>", "<Null>"]
data = [dict(zip(fields, row)) for row in arcpy.da.SearchCursor(layer_or_table, fields)] # [{"Field1": 0, "Field2": 1}, {"Field1": 5, "Field2": 2}]
counts = dict()
for row in data:
for field, value in row.items():
if value in junk_values:
key = (field, value)
try:
counts[key] += 1
except KeyError:
counts[key] = 1
return counts
tables = ["TestPoints", "TestLines", "TestPolygons", "TestTable"]
# you can also use paths to your tables, or list them automatically with arcpy.ListTables() / arcpy.ListFeatureclasses()
log_table = arcpy.management.CreateTable("memory", "JunkLogTable")
arcpy.management.AddField(log_table, "TABLE_NAME", "TEXT")
arcpy.management.AddField(log_table, "COLUMN_NAME", "TEXT")
arcpy.management.AddField(log_table, "VAL_COUNT", "SHORT")
arcpy.management.AddField(log_table, "VALUE", "TEXT")
with arcpy.da.InsertCursor(log_table, ["TABLE_NAME", "COLUMN_NAME", "VALUE", "VAL_COUNT"]) as cursor:
for table in tables:
result = check_for_junk_values(table)
for key, count in result.items():
col = key[0]
val = key[1].__repr__() # __repr__() to get the quotes
cursor.insertRow([table, col, val, count])
Related:
Global Arcade functions —> Example: "...a function that checks non-spatial fields for incorrect blank values."