-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathemployment_get.R
282 lines (231 loc) · 10 KB
/
employment_get.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#' @title Get employment data
#'
#' @description Download the employment data for a country and arrange it to the
#' 64x64 SIOTs.
#'
#' @details Currently works only with product x product tables.
#' @param geo The country code.
#' @param year The year. The average employment will be created for the given
#' year, starting with \code{2008}, when the NACE Rev 2 was introduced in
#' employment statistics.
#' @param sex Defaults to \code{"Total"}. Enter \code{"Females"} or \code{"F"} for
#' female employment, \code{"Males"} or \code{"M"} for male employment.
#' @param age Defaults to \code{"Y_GE15"}, which is the Eurostat code for employment in
#' all age groups starting from 15-years-old. Any Eurostat code can be used as a
#' parameter.
#' @param labelling Either \code{"iotables"} or the applicable short code,
#' for product x product SIOTs \code{"prod_na"} and in the case of industry x
#' industry SIOTs \code{"induse"}.
#' @param data_directory Defaults to \code{NULL}, if a valid directory, it will try to save the pre-processed
#' data file here with labelling.
#' @param force_download Defaults to \code{FALSE}. It will use the existing downloaded file
#' in the \code{data_directory} or the temporary directory, if it exists.
#' @importFrom dplyr filter select mutate left_join rename ungroup summarize group_by
#' @importFrom tidyr spread
#' @importFrom eurostat get_eurostat
#' @source Eurostat statistic
#' \href{https://ec.europa.eu/eurostat/web/products-datasets/-/lfsq_egan22d}{Employment
#' by sex, age and detailed economic activity (from 2008 onwards, NACE Rev. 2 two digit level) - 1 000}
#' @return A data.frame with auxiliary metadata to conform the symmetric
#' input-output tables.
#' @family import functions
#' @autoglobal
#' @examples
#' \dontrun{
#' io_tables <- get_employment (
#' geo = "CZ",
#' year = "2010",
#' sex = "Total",
#' age = "Y_GE15",
#' data_directory = NULL,
#' force_download = TRUE
#' )
#' }
#' @export
employment_get <- function ( geo,
year = "2010",
sex = "Total",
age = "Y_GE15",
labelling = 'iotables',
data_directory = NULL,
force_download = FALSE) {
if ( ! labelling %in% c("iotables", 'prod_na', 'induse')) {
stop("Labelling must be any of 'iotables', 'prod_na' [product x product] or 'induse' [industry x industry]")
}
year_number <- as.numeric(year)
## Avoiding no visible binding for global variable 'data' ----------
getdata <- function(...)
{
e <- new.env()
name <- utils::data(..., envir = e)[1]
e[[name]]
}
employment_metadata <- getdata("employment_metadata")
save_employment_file <- paste0('employment_', tolower (age), '_',
tolower(sex), '_',
geo, '_', year, '_avg.rds')
### Changing to Eurostat in case of GB/UK and GR/EL-------
if ( geo %in% c("GB", "GR")) {
if (geo == "GB") {
warning ( "Switching GB to Eurostat abbreviation UK.")
geo <- "UK"
}
if (geo == "GR") {
warning ( "Switching GR to Eurostat abbreviation EL.")
geo <- "EL"
}
}
sex <- tolower(sex)
sex <- ifelse ( grepl("total", sex), "T",
ifelse ( grepl("female", sex), "F", "M"))
emp <- NULL
## Use data_directory if it exists--------------------------------
if ( !is.null(data_directory) ) {
#pre-existing raw data file in the data directory
emp_file_name <- file.path(data_directory, "lfsq_egan22d.rds")
if ( ! force_download ) { #no new download and filtered version exists
if ( file.exists(file.path(data_directory, save_employment_file)) ) {
message("Try to use pre-existing file ", save_employment_file )
tryCatch({
emp <- readRDS(file.path(data_directory, save_employment_file))
}, error = function(cond) {
message ( "Could not read file.", cond)
return(NULL)
}, finally ={
return(emp)
})
} else { #no filtered version exists, work with raw file
tryCatch({
#Read pre-existing unfiltered raw data file
emp <- readRDS(emp_file_name)
}, error = function(cond) {
message ( 'Could not read file ', emp_file_name, '\n', cond)
return(NULL)
}, finally ={
message ( 'Read ', emp_file_name )
})
}
} #end case of no forced download
} #end case data_directory is not NULL
## Forced/new download--------------------------------
if (is.null(emp)) {
message ( "Downloading employment data from the Eurostat database.")
emp <- eurostat::get_eurostat ("lfsq_egan22d")
if ( !is.null(data_directory) ) {
#if !is.null emp_file_name is the general file name (without filtering
# for the statistic and was created in the previous block including the
# directory name)
tryCatch({
#Read pre-existing unfiltered raw data file
saveRDS(emp, file = emp_file_name)
}, error = function(cond) {
message ( "Failed to save ", emp_file_name, '.')
}, finally = {
message ( 'Saving raw employment data to ', emp_file_name )
})
}
}
## Geo selection and exception handling--------------------------------
if ( geo %in% unique (emp$geo) ) {
select_geo <- which(as.character(emp$geo) %in% as.character(geo))
emp <- emp[select_geo, ]
} else {
stop ("No employment data found with geo parameter = ", geo )
}
if ( "TIME_PERIOD" %in% names(emp) ) {
# Breaking change from eurostat 4.0.0
emp <- emp %>% rename ( time = TIME_PERIOD)
}
emp$year <- as.numeric(substr(as.character(emp$time), start = 1, stop = 4))
## Year selection and exception handling -------------------------------------
if ( year %in% unique (emp$year) ) {
select_year <- which(emp$year %in% year_number)
emp <- emp[select_year, ]
} else {
stop ("No employment data found with the year parameter = ", year )
}
## Age group selection and exception handling ---------------------------------
if ( age %in% unique (emp$age) ) {
select_age <- which(as.character(emp$age) %in% as.character(age))
emp <- emp[select_age, ]
} else {
stop ("No employment data found with the age parameter = ", age )
}
## Sex variable selection and exception handling--------------------------------
if ( sex %in% unique (emp$sex) ) {
select_sex <- which(as.character(emp$sex) %in% as.character(sex))
emp <- emp[select_sex, ]
} else {
stop ("No employment data found with sex parameter = ", sex )
}
## Missing values changed to 0 -------------------------------------------------
emp$values <- ifelse ( is.na(emp$values), 0, emp$values )
## Data processing for employment variables ------------------------------------
employment <- emp %>%
mutate ( nace_r2 = as.character(.data$nace_r2) ) %>%
group_by ( nace_r2, year ) %>%
summarize ( values = mean(.data$values), .groups = "drop") %>%
dplyr::rename ( emp_code = nace_r2 ) %>%
ungroup () %>%
left_join ( employment_metadata,
by = "emp_code") %>% # iotables:::employment_metadata
dplyr::group_by ( code, variable, iotables_label ) %>%
dplyr::summarize ( values = sum(.data$values), .groups = "drop")
## If data_directory exists, save results -------------------------------
if ( ! is.null(data_directory) ) {
message ( "Saving ", save_employment_file )
saveRDS(employment, file = file.path(data_directory,
save_employment_file)
)
}
## If data_directory exists, save results--------------------------------
emp_sex <- ifelse ( tolower(sex) == "t", "total",
ifelse (tolower(sex) == "f", "female", "male" ))
if ( labelling == "iotables" ) { # this is the Eurostat manual-tutorial type labelling format
prefix <- data.frame (
iotables_row = paste0("employment_", emp_sex )
)
primary_employment_input <- employment %>%
filter ( variable == "prod_na" ) #does not matter which, not used
##No employment for imputed rent column--------------------------------
imputed_rent <- data.frame (
real_estate_imputed_a = 0
)
primary_employment_input <- primary_employment_input %>%
dplyr::ungroup() %>%
select ( iotables_label, values ) %>%
tidyr::spread ( iotables_label, values ) #use iotables_label in this case
} else if ( labelling == "prod_na" ){ ## this is the product x product labelling format
prefix <- data.frame (
prod_na = paste0("employment_", emp_sex )
)
primary_employment_input <- employment %>%
dplyr::filter ( variable == "prod_na" )
imputed_rent <- data.frame (
CPA_L68A = 0
)
primary_employment_input <- primary_employment_input %>%
dplyr::ungroup() %>%
dplyr::select ( code, values ) %>%
tidyr::spread ( code, values ) #use code for standard Eurostat library
} else if (labelling == "induse" ) { # this is the industry x industry labelling format
prefix <- data.frame (
induse = paste0("employment_", emp_sex )
)
primary_employment_input <- employment %>%
dplyr::filter ( variable == "induse" )
imputed_rent <- data.frame (
L68A = 0
)
primary_employment_input <- primary_employment_input %>%
dplyr::ungroup() %>%
dplyr::select ( code, values ) %>%
tidyr::spread ( code, values ) #use code for standard Eurostat library
} else {
warning("No L68A was added.")
return ( primary_employment_input )
}
return_employment <- cbind( prefix, primary_employment_input )
return_employment <- cbind ( return_employment, imputed_rent )
return_employment
}