-
Notifications
You must be signed in to change notification settings - Fork 2
/
create-duckdb-tables.R
225 lines (202 loc) · 5.53 KB
/
create-duckdb-tables.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
library(tidyverse)
library(DBI)
library(duckdb)
library(lubridate)
library(cli)
# Conectar a duckdb y generar tablas accesorias ---------------------------
ddb_fn <- "tmp/ddb/peru-vacunas-covid19.duckdb"
if (file.exists(ddb_fn)) {
unlink(ddb_fn) # make sure we start from a newly created db file
}
# Open the duckdb database file
con = dbConnect(duckdb(),
dbdir = ddb_fn,
read_only = FALSE)
tmp <- dbExecute(con, "SET memory_limit='6GB';")
# vacunas
ddl_vacunas <- "
CREATE OR REPLACE TABLE vacunas (
FECHA_CORTE DATE,
UUID VARCHAR,
GRUPO_RIESGO VARCHAR,
EDAD INTEGER,
SEXO VARCHAR,
FECHA_VACUNACION DATE,
DOSIS INTEGER,
FABRICANTE VARCHAR,
DIRESA VARCHAR,
DEPARTAMENTO VARCHAR,
PROVINCIA VARCHAR,
DISTRITO VARCHAR,
CLASIFICACION_VACUNA VARCHAR,
TIPO_EDAD VARCHAR,
-- convertir edades a años
edad_años INTEGER GENERATED ALWAYS AS (
CASE WHEN TIPO_EDAD = 'M'
THEN 0 -- FLOOR(CAST(EDAD AS DOUBLE)/12)
ELSE EDAD
END
),
-- generar un flag para vacunacion general
flag_vacunacion_general BOOLEAN GENERATED ALWAYS AS (
(
(FABRICANTE = 'SINOPHARM' AND
FECHA_VACUNACION > (DATE '2021-02-07')) OR
(FABRICANTE = 'PFIZER' AND
FECHA_VACUNACION > (DATE '2021-03-03')) OR
(FABRICANTE = 'ASTRAZENECA' AND
FECHA_VACUNACION > (DATE '2021-04-18')) OR
(FABRICANTE = 'MODERNA' AND
FECHA_VACUNACION > (DATE '2022-03-25'))
)
AND
(
(FECHA_VACUNACION >= (DATE '2021-02-09') AND DOSIS = 1) OR
(FECHA_VACUNACION >= (DATE '2021-03-02') AND DOSIS = 2) OR
(FECHA_VACUNACION >= (DATE '2021-10-15') AND DOSIS = 3) OR
(FECHA_VACUNACION >= (DATE '2022-04-02') AND DOSIS = 4) OR
(FECHA_VACUNACION >= (DATE '2023-01-01') AND (DOSIS = 4 OR DOSIS = 5))
)
)
);
"
dbExecute(con, ddl_vacunas)
# Almacenar los datos procesados, incluyendo los campos
# dinámicamente calculados para evitar "out of memory" en duckdb
# vacunas_proc
ddl_vacunas_proc <- "
CREATE OR REPLACE TABLE vacunas_proc (
FECHA_CORTE DATE,
UUID VARCHAR,
GRUPO_RIESGO VARCHAR,
EDAD INTEGER,
SEXO VARCHAR,
FECHA_VACUNACION DATE,
DOSIS INTEGER,
FABRICANTE VARCHAR,
DIRESA VARCHAR,
DEPARTAMENTO VARCHAR,
PROVINCIA VARCHAR,
DISTRITO VARCHAR,
CLASIFICACION_VACUNA VARCHAR,
TIPO_EDAD VARCHAR,
edad_años INTEGER,
flag_vacunacion_general BOOLEAN
);
"
dbExecute(con, ddl_vacunas_proc)
# Populate vacunas_proc
# pop_data_vacunas_proc <- "INSERT INTO vacunas_proc SELECT * from vacunas;"
# dbExecute(con, pop_data_vacunas_proc)
# epidates
ddl_epidates <- "
CREATE OR REPLACE TABLE epidates(
FECHA_VACUNACION DATE,
epi_year INTEGER,
epi_week INTEGER,
first_day_of_epi_week DATE,
last_day_of_epi_week DATE,
complete_epi_week INTEGER
);
"
dbExecute(con, ddl_epidates)
# agegroups
ddl_agegroups <- "
CREATE OR REPLACE TABLE agegroups(
EDAD INTEGER,
rango_edad_veintiles VARCHAR,
rango_edad_deciles VARCHAR,
rango_edad_quintiles VARCHAR,
rango_edad_owid VARCHAR
)
"
dbExecute(con, ddl_agegroups)
# views per dose
ddl_dosis1 <- "
create or replace view vacunas_dosis_1 as (
select * from vacunas_proc where flag_vacunacion_general and DOSIS = 1
);
"
dbExecute(con, ddl_dosis1)
ddl_dosis2 <- "
create or replace view vacunas_dosis_2 as (
select * from vacunas_proc where flag_vacunacion_general and DOSIS = 2
);
"
dbExecute(con, ddl_dosis2)
ddl_dosis3 <- "
create or replace view vacunas_dosis_3 as (
select * from vacunas_proc where flag_vacunacion_general and DOSIS = 3
);
"
dbExecute(con, ddl_dosis3)
ddl_dosis4 <- "
create or replace view vacunas_dosis_4 as (
select * from vacunas_proc where flag_vacunacion_general and DOSIS = 4
);
"
dbExecute(con, ddl_dosis4)
ddl_dosis5 <- "
create or replace view vacunas_dosis_5 as (
select * from vacunas_proc where flag_vacunacion_general and DOSIS = 5
);
"
dbExecute(con, ddl_dosis5)
#dosis_1_2_ddl <- "
#create or replace view vacunas_dosis_1_2 as (
#SELECT
# d1.UUID,
# d1.SEXO,
# d1.EDAD as EDAD_1,
# d1.GRUPO_RIESGO as GRUPO_RIESGO_1,
# d1.FECHA_VACUNACION as FECHA_VACUNACION_1,
# d1.FABRICANTE as FABRICANTE_1,
# d1.DIRESA as DIRESA_1,
# d1.DEPARTAMENTO as DEPARTAMENTO_1,
# d1.PROVINCIA as PROVINCIA_1,
# d1.DISTRITO as DISTRITO_1,
# d2.EDAD as EDAD_2,
# d2.GRUPO_RIESGO as GRUPO_RIESGO_2,
# d2.FECHA_VACUNACION as FECHA_VACUNACION_2,
# d2.FABRICANTE as FABRICANTE_2,
# d2.DIRESA as DIRESA_2,
# d2.DEPARTAMENTO as DEPARTAMENTO_2,
# d2.PROVINCIA as PROVINCIA_2,
# d2.DISTRITO as DISTRITO_2
#FROM
# vacunas_dosis_1 as d1
# left join vacunas_dosis_2 as d2
# on d1.UUID = d2.UUID
#);
#"
#
#dbExecute(con, dosis_1_2_ddl)
multidosis_fabricante_ddl <- "
create or replace view multidosis_fabricantes as (
SELECT
d1.FABRICANTE as FABRICANTE_1,
d2.FABRICANTE as FABRICANTE_2,
d3.FABRICANTE as FABRICANTE_3,
d4.FABRICANTE as FABRICANTE_4,
d5.FABRICANTE as FABRICANTE_5,
count(*) as CANTIDAD
FROM
vacunas_dosis_1 as d1
left join vacunas_dosis_2 as d2
on d1.UUID = d2.UUID
left join vacunas_dosis_3 as d3
on d1.UUID = d3.UUID
left join vacunas_dosis_4 as d4
on d1.UUID = d4.UUID
left join vacunas_dosis_5 as d5
on d1.UUID = d5.UUID
GROUP BY
d1.FABRICANTE,
d2.FABRICANTE,
d3.FABRICANTE,
d4.FABRICANTE,
d5.FABRICANTE
);
"
dbExecute(con, multidosis_fabricante_ddl)
dbDisconnect(con, shutdown = TRUE)