This repository was archived by the owner on Oct 11, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy path80_advanced_indexing.sql
133 lines (88 loc) · 5.85 KB
/
80_advanced_indexing.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
\c pg_features_demo
SET ROLE TO demorole;
/*
For good performance it's essential that frequent queries take advantage of indexes.
Next to default/regular (B-tree) indexes Postgres has a lot to offer in that area.
Available index types:
1) B-tree - the default index, functioning best for highly selective data (high number of distinct values).
every row value is stored also in the index.
2) GIN - Generalized Inverted Index. An "inverted" index, meaning every distinct value is only stored once. designed for
highly repetitive data. main use case - "full text" index.
3) GiST - Generalized Search Tree. A specialized index type mean mainly to answer questions like - is one value close to
another (proximity, KNN), is one value enclosed in another (2/3D coordinates). A GiST index is lossy thus matches still need to
be fetched from the table. For normal indexes "index only scans" are possible, thus matches are returned from index directly.
Can be well used also for "fuzzy" search.
4) SP-GiST - a special version of the above, more effective for repetitive data. few data types supported out of the box.
5) BRIN - Block Range Indexes allow very small indexes as they only store the biggest and smallest value for a range of blocks
(128 blocks/pages per defaults). Very efficient for naturally ordered big-data.
6) Bloom - a special lossy index for cases where multiple different column sets are used for filtering. Supports only equality
search (=) and only few common data types.
7) Hash - hash indexes allow fast searching and compact storage for larger strings for example but are not recommended before
Postgres 10, as they're not crash-safe. Equality comparisons only.
Additionally indexes can be declared as:
* Unique - only one distinct column value allowed over the whole table. NB! When some columns are "nullable" (i.e. without
a NOT NULL constraint) then uniqueness is not guaranteed.
* Multi-column - multiple column are stored in the index. When searching then the leftmost columns should aways be specified
for effective searching.
* Partial - with a WHERE condition to index only a subset of all values
* Functional - value returned by the given function will be stored in the index. The same filter needs to present in the query also
to be able to benefit from the index.
NB! Not all available data types have support for all index types. Users can add index support themselves though.
And when building indexes the maintenance_work_mem parameter can be increased to speed up the process significantly.
*/
SET search_path TO public;
/* B-tree (default) */
-- the simplest index declaration
CREATE INDEX ON main_datatypes(smallish_integers);
-- unique index with manually specified index name.
-- naming makes sense in conjunction with "IF NOT EXISTS", not to build an index twice. having duplicate indexes is a pure waste.
CREATE UNIQUE INDEX my_index ON main_datatypes(smallish_integers);
-- one can also use the "CONCURRENTLY" keyword to build the index so that other queries would be minimally affected.
CREATE INDEX CONCURRENTLY my_concurrently_built_index ON main_datatypes(smallish_integers);
-- building a partial index, leaving out nulls and negatives
CREATE INDEX ON main_datatypes(smallish_integers) WHERE smallish_integers > 0;
-- building a functional index on first 3 letters of text_data1
CREATE INDEX ON main_datatypes(substring(text_data1, 1, 3));
/* GIN */
RESET role; -- superuser needed for creating extensions and using COPY PROGRAM
CREATE EXTENSION IF NOT EXISTS btree_gin; -- needed for indexing some "usual" datatypes like integers
-- when we compare the B-tree and the GIN index we see that the latter is ~10x smaller
CREATE INDEX ON banking.account USING gin (teller_id);
-- JSONB - index top level keys for a simple NoSQL use case.
CREATE INDEX CONCURRENTLY ON main_datatypes USING gin (json_data);
-- Enables for example following indexed queries:
SELECT * FROM main_datatypes WHERE json_data @> '{"x": 1}';
-- JSONB - index also inner objects/paths
CREATE INDEX ON main_datatypes USING gin (json_data jsonb_path_ops);
/* GiST */
-- implementing exclusion constraints - ensure no time overlappings are possible
CREATE EXTENSION IF NOT EXISTS btree_gist;
CREATE TABLE public.room_reservation (
room text,
during tsrange,
EXCLUDE USING GIST (room WITH =, during WITH &&)
);
ALTER TABLE public.room_reservation OWNER TO demorole;
-- similarity search (or fuzzy, or simple kNN), 5 alphabetically most similar places to 'kramertneusiedel' in Austria
-- such fuzzy search could also be combined with other functions from the "fuzzystrmatch" extension.
CREATE TABLE fuzzy_search (name text);
ALTER TABLE fuzzy_search OWNER TO demorole;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
COPY fuzzy_search FROM PROGRAM 'curl www.cybertec.at/secret/orte.txt'; -- ~2k names
SET ROLE TO demorole;
CREATE INDEX ON fuzzy_search USING gist (name gist_trgm_ops);
SELECT * FROM fuzzy_search ORDER BY name <-> 'kramertneusiedel' LIMIT 5;
-- indexing geographical objects
CREATE TABLE containing_boxes(box_coords box);
INSERT INTO containing_boxes
SELECT format('((-%s,-%s),(%s,%s))', floor(random()*100), floor(random()*100), floor(random()*100), floor(random()*100) )::box
FROM generate_series(1, 1e5);
CREATE INDEX ON containing_boxes USING gist (box_coords);
ANALYZE containing_boxes; -- gather statistics
EXPLAIN SELECT * FROM containing_boxes WHERE box_coords @> '((2,2),(4,4))';
/* "Covering" indexes
This is a v11+ feature that enables to store some often selected columns together with the indexed key value in the
index tree, enabling fast access without going to the table files. NB! As of currently Postgres cannot use those extra
columns for filtering so multi-column indexes still have some use cases.
*/
CREATE INDEX ON room_reservation (room) INCLUDE (during);