Skip to content

Commit 8f6f509

Browse files
committedJan 17, 2022
Add compress function to return object with reduced memory usage
Objects are modified in place, arrays are replaced with an array that only has exactly the amount of capacity needed. This is useful in cases where the polygons will be used for a long time. By default, arrays are reserved with extra capacity that won't be used. (The empty array starts with a capacity of 16 elements by now, which is inefficient for decoded points of length 2) slice() allocates a new array, seemingly with shrunken capacity according to process.memoryUsage. This has an optional option to deduplicate identical points, which may be useful for collections of polygons sharing points as well as for calling compress multiple times with different objects. It's only safe for read-only uses, so it is disabled by default. For example, in node-geo-tz issue 131, I saw this change to memory usage and decoding time on Linux. This is useful for long-running processes that repeatedly use the objects. 1. No Override: 1.280 GB (1.8 seconds) 2. Defaults for cache(no numericArrayCache): 0.708 GB (3.4 seconds) 3. Adding the second Map (numericArrayCache): 0.435 GB (6.7 seconds) Closes mapbox#122
1 parent daad5e0 commit 8f6f509

File tree

4 files changed

+177
-0
lines changed

4 files changed

+177
-0
lines changed
 

‎README.md

+22
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,28 @@ var geojson = geobuf.decode(new Pbf(data));
5959
Given a [Pbf](https://github.com/mapbox/pbf) object with Geobuf data, return a GeoJSON object. When loading Geobuf data over `XMLHttpRequest`, you need to set `responseType` to [`arraybuffer`](https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/responseType).
6060

6161

62+
### compress
63+
64+
```js
65+
var geojson = geobuf.compress(geobuf.decode(new Pbf(data)));
66+
```
67+
68+
Given a GeoJSON object (or array of GeoJSON objects), returns an equivalent object with lower memory usage (avoid wasting memory usage on excess array capacity).
69+
This may be useful if GeoJSON objects are kept around for a long time after creating them.
70+
71+
```js
72+
// To additionally deduplicate identical arrays
73+
// (may be unsafe if the geodata points are modified by callers)
74+
var geojson = geobuf.compress(geobuf.decode(new Pbf(data)), new Map(), new Map());
75+
// To reuse caches when deduplicating multiple geobuf objects:
76+
// (may be unsafe if the geodata points are modified by callers)
77+
var cache = new Map();
78+
var numericArrayCache = new Map();
79+
var geojson = geobuf.compress(geobuf.decode(new Pbf(data)), cache, numericArrayCache);
80+
```
81+
82+
When `Map` is unavailable, this returns the original object without attempting to compress.
83+
6284
## Install
6385

6486
Node and Browserify:

‎compress.js

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
'use strict';
2+
3+
if (typeof Map == 'undefined' || !Object.entries) {
4+
module.exports = function compress(value) {
5+
return value;
6+
};
7+
return;
8+
}
9+
10+
/**
11+
* @param {array} value
12+
* @returns {Boolean} is this an array where all fields are numbers (including the empty array).
13+
*/
14+
function isNumericArray(value) {
15+
for (var i = 0; i < value.length; i++) {
16+
if (typeof (value[i]) !== 'number') {
17+
return false;
18+
}
19+
}
20+
return true;
21+
}
22+
23+
/**
24+
* Compress data returned by geobuf's decode function.
25+
* Objects are modified in place.
26+
*
27+
* This is useful in cases where the polygons will be used for a long time.
28+
* By default, arrays are reserved with extra capacity that won't be used.
29+
* (The empty array starts with a capacity of 16 elements by now,
30+
* which is inefficient for decoded points of length 2)
31+
*
32+
* This has an optional option to deduplicate identical points,
33+
* which may be useful for collections of polygons sharing points as well
34+
* as for calling compress multiple times with different objects.
35+
*
36+
* @param {any} value the value to compress.
37+
* @param {Map} [cache] by default, a new cache is created each time for external calls to compress.
38+
* Must support get/has/set.
39+
* @param {null|Map} [numericArrayCache] if non-null, this will be used to deduplicate
40+
* numeric arrays of any length, including empty arrays.
41+
*
42+
* This deduplication may be unsafe if callers would modify arrays.
43+
* @return {any} value with all fields compressed.
44+
*/
45+
function compress(value, cache = new Map(), numericArrayCache = null) {
46+
var i;
47+
if (cache.has(value)) {
48+
return cache.get(value);
49+
}
50+
if (Array.isArray(value)) {
51+
// By default, v8 allocates an array with a capacity of 16 elements.
52+
// This wastes memory for small arrays such as Points of length 2.
53+
//
54+
// The function slice is used because it was available in older JS versions
55+
// and experimentally appears to reduce capacity used.
56+
var result = value.slice();
57+
if (numericArrayCache && isNumericArray(result)) {
58+
var cacheKey = JSON.stringify(result);
59+
var cachedEntry = numericArrayCache.get(cacheKey);
60+
if (cachedEntry) {
61+
cache.set(value, cachedEntry);
62+
return cachedEntry;
63+
}
64+
// Reuse array instances such as [], [1.5, 1.5]
65+
numericArrayCache.set(cacheKey, result);
66+
cache.set(value, result);
67+
// Nothing left to compress.
68+
return result;
69+
}
70+
// Store this in the cache immediately to guard against infinite recursion on
71+
// invalid inputs.
72+
cache.set(value, result);
73+
for (i = 0; i < result.length; i++) {
74+
result[i] = compress(result[i], cache, numericArrayCache);
75+
}
76+
return result;
77+
} else if (value && typeof value === 'object') {
78+
// Compress fields of the object in place.
79+
// Set this to the cache immediately to prevent infinite recursion on invalid data.
80+
cache.set(value, value);
81+
var entries = Object.entries(value);
82+
for (i = 0; i < entries.length; i++) {
83+
var entry = entries[i];
84+
var field = entry[1];
85+
var compressedValue = compress(field, cache, numericArrayCache);
86+
if (field !== compressedValue) {
87+
// Replace object field for this key with the compressed version
88+
value[entry[0]] = compressedValue;
89+
}
90+
}
91+
} else if (typeof value === 'string') {
92+
// Deduplicate strings.
93+
cache.set(value, value);
94+
}
95+
return value;
96+
}
97+
module.exports = compress;

‎index.js

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22

33
exports.encode = require('./encode');
44
exports.decode = require('./decode');
5+
exports.compress = require('./compress');

‎test/validate.test.js

+57
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,63 @@ test('roundtrip a circle with potential accumulating error', function (t) {
8282
t.end();
8383
});
8484

85+
test('can compress memory', function (t) {
86+
if (typeof Map === 'undefined') {
87+
t.end();
88+
return;
89+
}
90+
// Generate an invalid shape with duplicate points.
91+
var feature = {
92+
'type': 'MultiPolygon',
93+
'coordinates': [[[]]]
94+
};
95+
var points = 16;
96+
for (var i = 0; i <= points; i++) {
97+
feature.coordinates[0][0].push([
98+
Math.cos(Math.PI * 2.0 * (i % 4) / points),
99+
Math.sin(Math.PI * 2.0 * (i % 4) / points)
100+
]);
101+
}
102+
var roundTripped = geobuf.decode(new Pbf(geobuf.encode(feature, new Pbf())));
103+
var originalJSON = JSON.stringify(roundTripped);
104+
var compressedFeature = geobuf.compress(roundTripped);
105+
var compressedJSON = JSON.stringify(compressedFeature);
106+
var c = compressedFeature.coordinates;
107+
t.same(compressedJSON, originalJSON);
108+
t.same(c[0][0][0], c[0][0][4], 'should be points with equivalent data');
109+
t.notStrictEqual(c[0][0][0], c[0][0][4], 'should not deduplicate different array instances by default');
110+
t.same(c[0][0][0], [1, 0], 'should preserve value');
111+
t.end();
112+
});
113+
test('can compress memory and deduplicate points', function (t) {
114+
if (typeof Map === 'undefined') {
115+
t.end();
116+
return;
117+
}
118+
// Generate an invalid shape with duplicate points.
119+
var feature = {
120+
'type': 'MultiPolygon',
121+
'coordinates': [[[]]]
122+
};
123+
var points = 12;
124+
for (var i = 0; i <= points; i++) {
125+
feature.coordinates[0][0].push([
126+
Math.cos(Math.PI * 2.0 * (i % 4) / points),
127+
Math.sin(Math.PI * 2.0 * (i % 4) / points)
128+
]);
129+
}
130+
var roundTripped = geobuf.decode(new Pbf(geobuf.encode(feature, new Pbf())));
131+
var originalJSON = JSON.stringify(roundTripped);
132+
var compressedFeature = geobuf.compress(roundTripped, new Map(), new Map());
133+
var compressedJSON = JSON.stringify(compressedFeature);
134+
var polygon = compressedFeature.coordinates[0][0];
135+
t.same(compressedJSON, originalJSON);
136+
t.same(polygon[0], polygon[4], 'should be polygon with equivalent data');
137+
t.strictEqual(polygon[0], polygon[4], 'should deduplicate different array instances when cache passed in');
138+
t.strictEqual(polygon[0], polygon[8], 'should deduplicate different array instances when cache passed in');
139+
t.same(polygon[0], [1, 0], 'should preserve value');
140+
t.end();
141+
});
85142
function roundtripTest(geojson) {
86143
return function (t) {
87144
var buf = geobuf.encode(geojson, new Pbf());

0 commit comments

Comments
 (0)
Please sign in to comment.