I have a project where I need to import 120+ million features across 20+ tables from an elderly Oracle instance into a PostgreSQL 11 instance. The details aren't critical, but the end result was a need to create tables in a PostgreSQL instance without an Enterprise geodatabase enabled (at least, to start), and populate them through arcpy.da.InsertCursor.
It took some pain to generate a QueryLayer source and frame an arcpy.da.SearchCursor to generate the source stream, then it was time to populate the new tables, at which point I hit the "No support for this geometry type" error on cursor.insertRow(row).
The code seemed fine:
CREATE TABLE nominal_tbl (
objectid serial NOT NULL,
idval varchar(15) NOT NULL,
seqid integer,
geom geometry,
insert_dt timestamp without time zone DEFAULT current_timestamp,
CONSTRAINT enforce_srid_shape CHECK (st_srid(geom) = 4326)
);
import os
print("Importing arcpy...")
import arcpy
data1 = [
{ 'idval' : 'ID000004', 'seqid' : 4, 'wkt' : 'POINT (4 1)' },
{ 'idval' : 'ID000005', 'seqid' : 5, 'wkt' : 'POINT (5 1)' },
]
connPath = r'C:\Temp\user@localhost(db301).sde'
sr = arcpy.SpatialReference(4326)
sr.setFalseOriginAndUnits(-400.0,-400.0,10000000)
iCols = ['idval','seqid','shape@WKT']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_tbl'),iCols)
for data in data1:
iCursor.insertRow([data['idval'],data['seqid'],data['wkt']])
del iCursor
But it didn't work:
Importing arcpy...
Traceback (most recent call last):
File "C:\Temp\CursorInserter0.py", line 22, in <module>
iCursor.insertRow([data['idval'],data['seqid'],data['wkt']])
RuntimeError: No support for this geometry type.
>>>
So I tried an alternate solution -- Add a CLOB column to the business table, and drive the InsertCursor with a Well-Known Text into the insertRow, and an INSERT trigger that populated the geometry column from WKT, then set the CLOB column to NULL. That looked like this:
DROP TABLE IF EXISTS nominal_tbl CASCADE;
CREATE TABLE nominal_tbl (
objectid serial NOT NULL,
idval varchar(15) NOT NULL,
seqid integer,
geom geometry(POINT,4326),
insert_dt timestamp without time zone DEFAULT current_timestamp,
wkt text,
CONSTRAINT enforce_srid_shape CHECK (st_srid(geom) = 4326)
);
DROP FUNCTION IF EXISTS nominal_ifn();
CREATE FUNCTION nominal_ifn() RETURNS trigger AS $CODE$
BEGIN
IF NEW.geom IS NULL AND NEW.wkt IS NOT NULL THEN
NEW.geom := ST_GeomFromText(NEW.wkt,4326);
NEW.wkt := NULL;
END IF;
RETURN NEW;
END;
$CODE$ LANGUAGE plpgsql;
CREATE TRIGGER nominal_itg BEFORE INSERT ON nominal_tbl
FOR EACH ROW EXECUTE FUNCTION nominal_ifn();
When I ran the insert again, it still failed. "No support for this geometry type" even with the geometry column not in the insert list!
My next plan was to generate a view that covered only the columns being inserted, then drive the insert from that (I even tested the inserts all three ways):
CREATE VIEW nominal_ivw AS
SELECT idval,seqid,wkt
FROM nominal_tbl;
INSERT INTO nominal_tbl(
idval,seqid,geom)
VALUES ('ID00000001',1,ST_GeomFromText('POINT(1 1)',4326));
INSERT INTO nominal_tbl(
idval,seqid,wkt)
VALUES ('ID00000002',2,'POINT(2 1)');
INSERT INTO nominal_ivw(
idval,seqid,wkt)
VALUES ('ID00000003',3,'POINT(3 1)');
So now I had to tweak the script to hit all three tables:
import os
print("Importing arcpy...")
import arcpy
data1 = [
# 1,2,3 inserted earlier
{ 'idval' : 'ID000004', 'seqid' : 4, 'wkt' : 'POINT (4 1)' },
{ 'idval' : 'ID000005', 'seqid' : 5, 'wkt' : 'POINT (5 1)' },
]
data2 = [
{ 'idval' : 'ID000011', 'seqid' : 11, 'wkt' : 'POINT (1 3)' },
{ 'idval' : 'ID000012', 'seqid' : 12, 'wkt' : 'POINT (2 3)' },
{ 'idval' : 'ID000013', 'seqid' : 13, 'wkt' : 'POINT (3 3)' },
{ 'idval' : 'ID000014', 'seqid' : 14, 'wkt' : 'POINT (4 3)' },
{ 'idval' : 'ID000015', 'seqid' : 15, 'wkt' : 'POINT (5 3)' },
]
data3 = [
{ 'idval' : 'ID000021', 'seqid' : 21, 'wkt' : 'POINT (1 5)' },
{ 'idval' : 'ID000022', 'seqid' : 22, 'wkt' : 'POINT (2 5)' },
{ 'idval' : 'ID000023', 'seqid' : 23, 'wkt' : 'POINT (3 5)' },
{ 'idval' : 'ID000024', 'seqid' : 24, 'wkt' : 'POINT (4 5)' },
{ 'idval' : 'ID000025', 'seqid' : 25, 'wkt' : 'POINT (5 5)' },
]
connPath = r'C:\Temp\usero@localhost(db301).sde'
sr = arcpy.SpatialReference(4326)
sr.setFalseOriginAndUnits(-400.0,-400.0,10000000)
iCols = ['idval','seqid','shape@WKT']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_tbl'),iCols)
for data in data1:
try:
iCursor.insertRow([data['idval'],data['seqid'],data['wkt']])
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
del iCursor
iCols = ['idval','seqid','wkt']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_tbl'),iCols)
for data in data2:
try:
iCursor.insertRow([data['idval'],data['seqid'],data['wkt']])
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
iCols = ['idval','seqid','wkt']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_ivw'),iCols)
for data in data3:
try:
iCursor.insertRow([data['idval'],data['seqid'],data['wkt']])
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
del iCursor
qCols = ['objectid','idval','shape@WKT','insert_dt','seqid']
with arcpy.da.SearchCursor(
os.path.join(connPath,'nominal_tbl'),qCols,
spatial_reference=sr) as qCursor:
i = 0
for row in qCursor:
i += 1
if ((i % 5) == 4):
print("{:6d} -".format(i))
for j,name in enumerate(qCursor.fields):
print("{:>24s} : {:s}".format(
name.encode('UTF-8'),
str(row[j]) if row[j] != None else 'NULL'))
Except they all worked! No "No support for this geometry type"!
Huh? What was going on?
Frustrated, I decided to at least do performance testing on the three methodologies, to see how expensive the trigger was in my now dashed solution. But first, I had to even the playing field by making the three methods insert the same number of rows (and lots more of them), and I added a TRUNCATE via an arcpy.ArcSDESQLExecute cursor. Augmented a bit, that looked like this:
import os
import math
import random
import datetime
print("Importing arcpy...")
import arcpy
class PointGenerator():
def __init__(self,max_rows):
self.max_rows = max_rows
self.nrows = 0
self.seqid = 0
random.seed('123456789')
def generateRow(self):
if self.nrows >= self.max_rows:
self.nrows = 0
return None
self.seqid += 1
self.nrows += 1
return ["ID{:07d}".format(self.seqid),
int(self.seqid),
"POINT ({:.7f} {:.7f})".format(
(random.random()*360.0) - 180.0,
(math.acos(1.0 - 2.0 * random.random()) *
2.0 - math.pi) * 90.0 / math.pi)]
# Math above generates random area distribution of points
connPath = r'C:\Temp\user@localhost(db301).sde'
sr = arcpy.SpatialReference(4326)
sr.setFalseOriginAndUnits(-400.0,-400.0,10000000)
cursor = arcpy.ArcSDESQLExecute(connPath)
cursor.execute("TRUNCATE TABLE nominal_tbl RESTART IDENTITY")
print("---- TRUNCATED ----")
del cursor
pg = PointGenerator(5) #(50000) - easy peasy, to test
iCols = ['idval','seqid','shape@WKT']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_tbl'),iCols)
while True:
row = pg.generateRow()
if row == None: break
try:
iCursor.insertRow(row)
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
del iCursor
print("Elapsed = {:.3f} secs".format(
(datetime.datetime.utcnow()-start).total_seconds()))
iCols = ['idval','seqid','wkt']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_tbl'),iCols)
while True:
row = pg.generateRow()
if row == None: break
try:
iCursor.insertRow(row)
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
del iCursor
print("Elapsed = {:.3f} secs".format(
(datetime.datetime.utcnow()-start).total_seconds()))
iCols = ['idval','seqid','wkt']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_ivw'),iCols)
while True:
row = pg.generateRow()
if row == None: break
try:
iCursor.insertRow(row)
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
del iCursor
print("Elapsed = {:.3f} secs".format(
(datetime.datetime.utcnow()-start).total_seconds()))
And then the "No support for this geometry type" was back!
Importing arcpy...
---- TRUNCATED ----
Insert failed! seqid = 1
No support for this geometry type.
Elapsed = 0.163 secs
Insert failed! seqid = 2
No support for this geometry type.
Elapsed = 0.123 secs
Elapsed = 0.037 secs
>>>
Whoa! Now the mystery unraveled...
The problem: I didn't define the table with an explicit data type. I was using old-style PostGIS creation syntax, when I could have used this:
DROP TABLE IF EXISTS nominal_tbl CASCADE;
CREATE TABLE nominal_tbl (
objectid serial NOT NULL,
idval varchar(15) NOT NULL,
seqid integer,
geom geometry(POINT,4326),
-- This ^^^^^ is the key!
insert_dt timestamp without time zone DEFAULT current_timestamp,
wkt text,
CONSTRAINT enforce_srid_shape CHECK (st_srid(geom) = 4326)
);
The thing is, I have mostly polygon layers to load, and if you define the table like this:
CREATE TABLE polygon_tbl (
objectid serial NOT NULL,
idval varchar(15) NOT NULL,
seqid integer,
geom geometry(POLYGON,4326),
insert_dt timestamp without time zone DEFAULT current_timestamp,
wkt text,
CONSTRAINT enforce_srid_shape CHECK (st_srid(geom) = 4326)
);
then any MULTIPOLYGON features would be rejected.
If instead I did this:
DROP TABLE IF EXISTS polygon_tbl CASCADE;
CREATE TABLE polygon_tbl (
objectid serial NOT NULL,
idval varchar(15) NOT NULL,
seqid integer,
geom geometry(MULTIPOLYGON,4326),
insert_dt timestamp without time zone DEFAULT current_timestamp,
wkt text,
CONSTRAINT enforce_srid_shape CHECK (st_srid(geom) = 4326)
);
then any POLYGON features would be rejected.
There are ways around this latter situation, though, since PostGIS supports ST_Multi, which could be used like this in an INSERT trigger:
DROP FUNCTION IF EXISTS polygon_ifn();
CREATE FUNCTION polygon_ifn() RETURNS trigger AS $CODE$
BEGIN
IF NEW.geom IS NULL AND NEW.wkt IS NOT NULL THEN
NEW.geom := ST_Multi(ST_GeomFromText(NEW.wkt,4326));
NEW.wkt := NULL;
END IF;
RETURN NEW;
END;
$CODE$ LANGUAGE plpgsql;
But what was the timing for 50k rows like again?
Importing arcpy...
---- TRUNCATED ----
Elapsed = 6.240 secs (shape@WKT)
Elapsed = 6.096 secs (wkt in table)
Elapsed = 5.901 secs (wkt in view)
>>>
Okay, how about we put the row generation outside the timing? One last script:
import os
import math
import random
import datetime
print("Importing arcpy...")
import arcpy
def generator_fn(max_rows):
random.seed('123456789')
for seqid in range(1,max_rows+1):
wkt = "POINT ({:.7f} {:.7f})".format(
(random.random()*360.0) - 180.0,
(math.acos(1.0 - 2.0 * random.random()) *
2.0 - math.pi) * 90.0 / math.pi)
# Math above generates random area distribution of points
row = ["ID{:07d}".format(seqid),int(seqid),wkt]
yield row
connPath = r'C:\Temp\user@localhost(db301).sde'
sr = arcpy.SpatialReference(4326)
sr.setFalseOriginAndUnits(-400.0,-400.0,10000000)
cursor = arcpy.ArcSDESQLExecute(connPath)
cursor.execute("TRUNCATE TABLE nominal_tbl RESTART IDENTITY")
print("---- TRUNCATED ----")
del cursor
data = [row for row in generator_fn(50000)]
iCols = ['idval','seqid','shape@WKT']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_tbl'),iCols)
for row in data:
try:
iCursor.insertRow(row)
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
del iCursor
ref1 = (datetime.datetime.utcnow()-start).total_seconds()
print("Elapsed = {:.3f} secs".format(ref1))
iCols = ['idval','seqid','wkt']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_tbl'),iCols)
for row in data:
try:
iCursor.insertRow(row)
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
del iCursor
ref2 = (datetime.datetime.utcnow()-start).total_seconds()
print("Elapsed = {:.3f} secs ({:.2f}%)".format(ref2,(ref1-ref2)*100/ref1))
iCols = ['idval','seqid','wkt']
start = datetime.datetime.utcnow()
iCursor = arcpy.da.InsertCursor(os.path.join(connPath,'nominal_ivw'),iCols)
for row in data:
try:
iCursor.insertRow(row)
except Exception as e:
print("Insert failed! seqid = {:d}\n{:s}".format(row[1],str(e)))
break
del iCursor
ref3 = (datetime.datetime.utcnow()-start).total_seconds()
print("Elapsed = {:.3f} secs ({:.2f}%)".format(ref3,(ref1-ref3)*100/ref1))
and the results:
Importing arcpy...
---- TRUNCATED ----
Elapsed = 5.946 secs
Elapsed = 5.376 secs (9.59%)
Elapsed = 5.313 secs (10.65%)
>>>
Hmmm... Using the trigger is faster, not slower! And with 120M rows to load, a ~10% performance improvement is nothing to sneeze at, and inserting into the view is faster still, and doesn't force all the POLYGON features into degenerate MULTIPOLYGONs either.
Pardon me, I have some views to create...
- V
Note: Updated final script's kludgy generator code to use Python wait capability (way cool!)
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.