Skip to content

Commit fa2778e

Browse files
committed
Replace LIMIT/OFFSET with Psycopg2 server-side cursor
Fixes #138
1 parent b0dfbf6 commit fa2778e

File tree

1 file changed

+35
-31
lines changed

1 file changed

+35
-31
lines changed

python/utils.py

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import psycopg2
1313
import json
1414
import decimal
15+
import re
1516

1617
from elasticsearch import Elasticsearch
1718
from elasticsearch import helpers
@@ -61,7 +62,7 @@ def __init__(self, PostGISConnection, ESConnection, view, sqlquerystring):
6162
self._pgConnection = PostGISConnection
6263
self._esConnection = ESConnection
6364
self._view = view
64-
self._sqlquerystring = sqlquerystring
65+
self._sqlquerystring = re.sub(r'\s{2,}', ' ', sqlquerystring)
6566
self._auth = get_config_params('config.ini')
6667

6768
def pgConnection(self):
@@ -79,12 +80,8 @@ def auth(self):
7980
def sqlquerystring(self):
8081
return self._sqlquerystring
8182

82-
def getGeoJson(self, sqlquerystring, pgConnection):
83-
cur = pgConnection.pgConnection().cursor()
84-
cur.execute(sqlquerystring)
85-
rows = cur.fetchall()
83+
def getGeoJson(self, rows, columns):
8684
if rows:
87-
columns = [name[0] for name in cur.description]
8885
geomIndex = columns.index('st_asgeojson')
8986
feature_collection = {'type': 'FeatureCollection',
9087
'features': []}
@@ -101,6 +98,7 @@ def getGeoJson(self, sqlquerystring, pgConnection):
10198
value = row[index]
10299
feature['properties'][column] = value
103100
feature_collection['features'].append(feature)
101+
104102
geojsonobject = json.dumps(feature_collection,
105103
indent=2,
106104
default=decimal_default)
@@ -142,33 +140,41 @@ def postgis2es(self):
142140
sqlquerystring = self.sqlquerystring().format(
143141
**{'limit': self.LIMIT,
144142
'offset': self.OFFSET})
145-
geojsonobject = self.getGeoJson(sqlquerystring, self.pgConnection())
146-
while geojsonobject is not None:
147-
148-
print(sqlquerystring)
149-
self.populateElasticSearchIndex(self.esConnection(),
150-
geojsonobject,
151-
self.auth(),
152-
self.view())
153-
self.OFFSET += self.LIMIT
154-
155-
sqlquerystring = self.sqlquerystring().format(
156-
**{'limit': self.LIMIT,
157-
'offset': self.OFFSET})
158-
geojsonobject = self.getGeoJson(sqlquerystring,
159-
self.pgConnection())
143+
144+
# Remove LIMIT and OFFSET until we decide to change all caller scripts
145+
sqlquerystring = re.sub(r'\s+LIMIT.*', '', sqlquerystring)
146+
147+
print(sqlquerystring)
148+
149+
with self.pgConnection().pgConnection() as conn:
150+
with conn.cursor(name='postgis2es_cursor') as cur:
151+
cur.itersize = self.LIMIT
152+
cur.execute(sqlquerystring)
153+
rows = cur.fetchmany(self.LIMIT)
154+
columns = [name[0] for name in cur.description]
155+
156+
count = 0
157+
while rows:
158+
count = count + 1
159+
print("Rows %d-%d, %s = %s" %
160+
(self.LIMIT * (count - 1) + 1, self.LIMIT * count,
161+
columns[0], rows[0][0]))
162+
163+
geojsonobject = self.getGeoJson(rows, columns)
164+
# print("populateElasticsearchIndex()")
165+
self.populateElasticSearchIndex(self.esConnection(),
166+
geojsonobject,
167+
self.auth(),
168+
self.view())
169+
rows = cur.fetchmany(self.LIMIT)
160170

161171
return
162172

163173

164174
class PostGISPointDataset(PostGISdataset):
165175

166-
def getGeoJson(self, sqlquerystring, pgConnection):
167-
cur = pgConnection.pgConnection().cursor()
168-
cur.execute(sqlquerystring)
169-
rows = cur.fetchall()
176+
def getGeoJson(self, rows, columns):
170177
if rows:
171-
columns = [name[0] for name in cur.description]
172178
geomIndex = columns.index('st_asgeojson')
173179
feature_collection = {'type': 'FeatureCollection',
174180
'features': []}
@@ -187,6 +193,7 @@ def getGeoJson(self, sqlquerystring, pgConnection):
187193
value = row[index]
188194
feature['properties'][column] = value
189195
feature_collection['features'].append(feature)
196+
190197
geojsonobject = json.dumps(feature_collection,
191198
indent=2,
192199
default=decimal_default)
@@ -197,12 +204,8 @@ def getGeoJson(self, sqlquerystring, pgConnection):
197204

198205
class PostGISTable(PostGISdataset):
199206

200-
def getGeoJson(self, sqlquerystring, pgConnection):
201-
cur = pgConnection.pgConnection().cursor()
202-
cur.execute(sqlquerystring)
203-
rows = cur.fetchall()
207+
def getGeoJson(self, rows, columns):
204208
if rows:
205-
columns = [name[0] for name in cur.description]
206209
# geomIndex = columns.index('st_asgeojson')
207210
feature_collection = {'type': 'FeatureCollection',
208211
'features': []}
@@ -221,6 +224,7 @@ def getGeoJson(self, sqlquerystring, pgConnection):
221224
value = row[index]
222225
feature['properties'][column] = value
223226
feature_collection['features'].append(feature)
227+
224228
geojsonobject = json.dumps(feature_collection,
225229
indent=2,
226230
default=decimal_default)

0 commit comments

Comments
 (0)