Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Geocodificado recursivo #5

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
Package: caRtociudad
Type: Package
Title: Interface to Cartociudad API
Version: 0.5.2
Version: 0.5.5
Date: 2017-07-26
Encoding: UTF-8
Authors@R: c(person("Carlos J.", "Gil Bellosta", email="[email protected]", role=c('cre', 'aut')),
person("Luz", "Frías", email = "[email protected]", role = "aut"))
Author: Carlos J. Gil Bellosta, Luz Frías
Maintainer: Carlos J. Gil Bellosta <[email protected]>
Description: Access to Cartociudad cartography API, which provides mapping and other related services for Spain.
Imports: httr, jsonlite, xml2, plyr, geosphere
Imports: httr, jsonlite, xml2, plyr, geosphere, utils
Depends: R (>= 3.0.0)
Suggests: ggmap, testthat
URL: https://github.com/cjgb/caRtociudad
Expand Down
164 changes: 116 additions & 48 deletions R/cartociudad_geocode.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,29 @@

#' @title Interface to Cartociudad geolocation API
#'
#' @description Geolocation of Spanish addresses via Cartociudad API calls, providing the
#' full address in a single text string via \code{full_address}. It is
#' advisable to add the street type (calle, etc.) and to omit the country
#' name.
#' @description Geolocation of Spanish addresses via Cartociudad API calls,
#' providing the full address in a single text string via \code{full_address}.
#' It is advisable to add the street type (calle, etc.) and to omit the
#' country name.
#'
#' @usage cartociudad_geocode(full_address, on.error = "fail", ...)
#' @usage cartociudad_geocode(full_address, version = c("current", "prev"),
#' output_format = "JSON", on_error = c("warn", "fail"), ntries = 10)
#'
#' @param full_address Character string providing the full address to be
#' geolocated; e.g., "calle miguel servet 5, zaragoza". Adding the country may
#' cause problems.
#' @param on.error Defaults to \code{fail}; in such case, in case of errors in the API call, the process will fail. Set it to
#' "warn" and, in case of errors, the function will return \code{NULL} and a warning.
#' @param ... Other parameters for the API. See Details section below.
#'
#' @details The entity geolocation API admits more parameters beyond the address field such as \code{id} or \code{type}.
#' You can use these extra arguments (see the References or the Examples sections below for further information)
#' at your own risk.
#' @param version Character string. Geocoder version to use: \code{current} or
#' \code{prev}.
#' @param output_format Character string. Output format of the query:
#' \code{JSON} or \code{GeoJSON}. Only applicable if you choose version =
#' "current".
#' @param on_error Character string. Defaults to \code{warn}: in case of errors,
#' the function will return an empty \code{data.frame} and a warning. Set it
#' to \code{fail} to stop the function call in case of errors in the API call.
#' @param ntries Numeric. In case of connection failure, number of \code{GET}
#' requests to be made before stopping the function call.
#'
#' @return A data frame consisting of a single row per guess. See the reference
#' @return A data frame consisting of a single row per query. See the reference
#' below for an explanation of the data frame columns.
#'
#' @author Carlos J. Gil Bellosta
Expand All @@ -32,43 +36,107 @@
#' \url{http://www.cartociudad.es/recursos/Documentacion_tecnica/CARTOCIUDAD_ServiciosWeb.pdf}
#'
#' @examples
#' # standard usage
#' res <- cartociudad_geocode(full_address = "plaza de cascorro 11, 28005 madrid")
#'
#' #' # km 41 of A-23 motorway
#' res <- cartociudad_geocode("A-23 41")
#'
#' # specific usage (see References for details)
#' res <- cartociudad_geocode("A-23 41", type = "portal", id = "600000000045", portal = 41)
#'
#' # vectorized call
#' \dontrun{
#' addresses <- paste("A-23", 1:10)
#' res <- lapply(addresses, cartociudad_geocode, on.error = "warn")
#' }
#' # Query a single address
#' address <- "plaza de cascorro 11, 28005 madrid"
#' my.address <- cartociudad_geocode(full_address = address)
#' print(my.address)
#'
#' # Query multiple addresses
#' address <- c(address, "plaza del ayunamiento 1, valencia")
#' my.address <- cartociudad_geocode(full_address = address)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Misma recomendación que con cartociudad_reverse_geocode: sugiero incluir un test para múltiples direcciones

#' print(my.address)
#'
#' @export
#'
cartociudad_geocode <- function(full_address, version = c("current", "prev"),
output_format = "JSON", on_error = c("warn", "fail"),
ntries = 1) {

stopifnot(class(full_address) == "character")
stopifnot(length(full_address) >= 1)
version <- match.arg(version)
on_error <- match.arg(on_error)
no_geocode <- which(nchar(full_address) == 0)
total <- length(full_address)
res_list <- vector("list", total)
curr_names <- c("id", "province", "muni", "tip_via", "address", "portalNumber",
"refCatastral", "postalCode", "lat", "lng", "stateMsg",
"state", "type")
prev_names <- c("road_fid", "province", "municipality", "road_type", "road_name",
"numpk_name", "numpk_fid", "zip", "latitude", "longitude",
"comments", "status")
pb <- utils::txtProgressBar(min = 0, max = total, style = 3)
empty_df <- as.data.frame(
matrix(NA_character_, nrow = 0, ncol = length(curr_names), dimnames = list(c(), curr_names)),
stringsAsFactors = FALSE
)
con_out <- numeric()

for (i in seq_len(total)) {
res_list[[i]] <- empty_df
if (!i %in% no_geocode) {
ua <- get_cartociudad_user_agent()
if (version == "current") {
api.args <- list(q = full_address[i], outputformat = output_format)
get_url <- "http://www.cartociudad.es/geocoder/api/geocoder/findJsonp"
} else {
api.args <- list(max_results = 1, address = full_address[i])
get_url <- "http://www.cartociudad.es/CartoGeocoder/Geocode"
}
res <- get_ntries(get_url, api.args, ua, ntries)

cartociudad_geocode <- function(full_address, on.error = "fail", ...) {

api.args <- c(list(q = full_address), ...)
ua <- get_cartociudad_user_agent()
res <- httr::GET("http://www.cartociudad.es/geocoder/api/geocoder/findJsonp",
query = api.args, ua)

if (httr::http_error(res)){
if (on.error == "fail")
stop("Call to cartociudad API failed with error code ", res$status_code)

warning("Call to cartociudad API failed with error code ", res$status_code)
return(NULL)
if (length(res) == 0) {
warning("Failing to connect with server in query ", i,
": try later with addressess in attr(results, 'rerun').")
res_list[[i]] <- plyr::rbind.fill(
res_list[[i]],
data.frame(address = full_address[i], version = version, stringsAsFactors = FALSE)
)
con_out <- c(con_out, i)
} else if (httr::http_error(res)) {
if (on_error == "fail")
stop("Call to cartociudad API failed with error code ", res$status_code)
warning("Error in query ", i, ": ", httr::http_status(res)$message)
res_list[[i]] <- plyr::rbind.fill(
res_list[[i]],
data.frame(address = full_address[i], version = version, stringsAsFactors = FALSE)
)
} else {
res <- jsonp_to_json(suppressMessages(httr::content(res, as = "text")))
res <- jsonlite::fromJSON(res)
res <- res[-which(names(res) %in% c("geom", "countryCode", "error", "success"))]
if (version == "current") {
res <- lapply(res, function(x) ifelse(is.null(x), NA_character_, x))
} else {
res <- res[[1]]
}
if (length(res) == 0) {
warning("The query ", i, " has 0 results.")
res_list[[i]] <- plyr::rbind.fill(
res_list[[i]],
data.frame(address = full_address[i], version = version, stringsAsFactors = FALSE)
)
} else {
if (version == "current") {
res_list[[i]] <- as.data.frame(t(unlist(res)), stringsAsFactors = FALSE)[, curr_names]
res_list[[i]] <- cbind(res_list[[i]], version = "current")
} else {
res_list[[i]] <- cbind(res[, prev_names], type = NA_character_, version = "prev")
names(res_list[[i]]) <- c(curr_names, "version")
row.names(res_list[[i]]) <- NULL
}
}
}
} else {
warning("Empty string as query in address ", i, ": NA returned.")
res_list[[i]] <- empty_df[1, ]
}
utils::setTxtProgressBar(pb, i)
}

res <- jsonp_to_json(httr::content(res, as = "text", encoding = "UTF8"))
res <- jsonlite::fromJSON(res)
res <- as.data.frame(t(unlist(res)), stringsAsFactors = FALSE)

res$lat <- as.numeric(res$lat)
res$lng <- as.numeric(res$lng)

res
cat("\n")
results <- plyr::rbind.fill(res_list)
results[, c("lat", "lng")] <- apply(results[, c("lat", "lng")], 2, as.numeric)
attributes(results)$rerun <- full_address[con_out]
return(results)
}
77 changes: 49 additions & 28 deletions R/cartociudad_reverse_geocode.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
#' @details This function performs reverse geocoding of a location. It returns
#' the details of the closest address in Spain.
#'
#' @usage cartociudad_reverse_geocode(latitude, longitude)
#' @usage cartociudad_reverse_geocode(latitude, longitude, ntries = 10)
#'
#' @param latitude Point latitude in geographical coordinates (e.g., 40.473219)
#' @param longitude Point longitude in geographical coordinates (e.g.,
#' -3.7227241)
#' @param ntries Numeric. In case of connection failure, number of \code{GET}
#' requests to be made before stopping the function call.
#'
#' @return A list with the following items:
#' @return A data frame consisting of a single row per query, with columns:
#' \item{tipo}{type of location.}
#' \item{tipo.via}{road type.}
#' \item{nombre.via}{road name.}
Expand All @@ -32,34 +34,53 @@
#' \url{http://www.cartociudad.es/recursos/Documentacion_tecnica/CARTOCIUDAD_ServiciosWeb.pdf}
#'
#' @examples
#' # Query one point
#' cartociudad_reverse_geocode(40.473219, -3.7227241)
#'
#' # Query multiple points
#' cartociudad_reverse_geocode(c(40.473219, 39.46979), c(-3.7227241, -0.376963))
#'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Como recomendación, estaría bien añadir un test en tests/testthat.R con el caso de llamar a cartociudad_reverse_geocode para múltiples localizaciones

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yo me ocupo de las pruebas.
He pensado en comprobar el funcionamiento ante casos típicos que estoy viendo, como una dirección sin número (devuelve la geometría de toda la vía y la latitud y longitud del portal 0), portales y pk's inexistentes (devolución del punto más próximo), y viales inexistentes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perfecto!

#' @export
#'
cartociudad_reverse_geocode <- function(latitude, longitude) {

query.parms <- list(
lat = latitude,
lon = longitude
)

url <- "http://www.cartociudad.es/services/api/geocoder/reverseGeocode"
ua <- get_cartociudad_user_agent()


res <- httr::GET(url, query = query.parms, ua)
httr::stop_for_status(res)
info <- httr::content(res)
# Parse the response
res <- list(
tipo = info$type,
tipo.via = info$tip_via,
nombre.via = info$address,
num.via = info$portalNumber,
num.via.id = info$id,
municipio = info$muni,
provincia = info$province,
cod.postal = info$postalCode
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sobre la eliminación de esto, creo que conviene mantener en el paquete el mapeo de nombres, para:

  • Mantener coherencia: son nombres con mismo idioma, mismo formato, no como los que devuelve directamente la API (hay inglés, español, underscored, camelCased, ...)
  • Mantener retro-compatibilidad con los usuarios actuales
  • Asegurar futura compatibilidad: si se lanza una nueva versión de la API y los campos tienen otros nombres, cambiamos el mapeo en el paquete, y la actualización será transparente para los usuarios

Dime cómo lo ves. Si estás de acuerdo, sería bueno mantener el trozo de documentación eliminado sobre los nombres

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Me parece perfecto, aunque creo que sería mejor devolver un data.frame en lugar de una lista. ¿Conservamos esa parte?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, totalmente de acuerdo, tiene más sentido ahora que está vectorizado

return(res)
cartociudad_reverse_geocode <- function(latitude, longitude, ntries = 1) {

stopifnot(length(latitude) == length(longitude) | length(latitude) == 0)

res_list <- list()
url <- "http://www.cartociudad.es/services/api/geocoder/reverseGeocode"
ua <- get_cartociudad_user_agent()
no_select <- c("geom", "poblacion", "stateMsg", "state", "priority", "countryCode")
total <- length(latitude)
pb <- utils::txtProgressBar(min = 0, max = total, style = 3)

for (i in seq_len(total)) {
query.parms <- list(lat = latitude[i], lon = longitude[i])
res <- get_ntries(url, query.parms, ua, ntries)
if (httr::http_error(res)) {
warning("Error in query ", i, ": ", httr::http_status(res)$message)
res_list[[i]] <- data.frame(lat = latitude[i], lng = longitude[i],
stringsAsFactors = FALSE)
} else if (length(httr::content(res)) == 0) {
warning("Query ", i, " produced 0 results.")
res_list[[i]] <- data.frame(lat = latitude[i], lng = longitude[i],
stringsAsFactors = FALSE)
} else {
info <- httr::content(res)
info <- info[-which(names(info) %in% no_select)]
res_list[[i]] <- as.data.frame(t(unlist(info)), stringsAsFactors = FALSE)
}
utils::setTxtProgressBar(pb, i)
}

cat("\n")
results <- plyr::rbind.fill(res_list)
names_old <- c("type", "tip_via", "address", "portalNumber", "id",
"muni", "province", "postalCode", "lat", "lng")
names_new <- c("tipo", "tipo.via", "nombre.via", "num.via", "num.via.id",
"municipio", "provincia", "cod.postal", "lat", "lng")
for (i in seq_len(ncol(results))) {
colnames(results)[colnames(results) == names_old[i]] <- names_new[i]
}

return(results)
}
15 changes: 15 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,18 @@ jsonp_to_json <- function(text) {
text <- gsub("\\)$", "", text)
return(text)
}

get_ntries <- function(url, query, ua, tries) {
withRestarts(
tryCatch(httr::GET(url, query = query, ua),
error = function(e) {invokeRestart("retry")}),
retry = function() {
if (tries <= 0) {
return(character())
}
message("Failing to connect with server: retrying...")
Sys.sleep(5)
get_ntries(url, query, ua, tries - 1)
}
)
}
53 changes: 27 additions & 26 deletions man/cartociudad_geocode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading