diff --git a/NAMESPACE b/NAMESPACE index 0fd7427..4e1fbaa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(problems) export(statcan_data) export(statcan_download_data) export(statcan_search) @@ -14,3 +15,4 @@ import(tibble) importFrom(data.table,fread) importFrom(data.table,setDF) importFrom(httr,http_error) +importFrom(readr,problems) diff --git a/R/statCanR.R b/R/statCanR.R index 8b5d497..6407009 100644 --- a/R/statCanR.R +++ b/R/statCanR.R @@ -51,7 +51,7 @@ statcan_data <- function(tableNumber, lang) urlEng <- paste0("https://www150.statcan.gc.ca/n1/en/tbl/csv/", tableNumber, "-eng.zip") - download_dir <- file.path(tempdir(), "datasetEng.zip") + download_file <- file.path(tempdir(), "datasetEng.zip") #utils::download.file(urlEng, download_dir, method = "curl") #data.table::fread(input = urlEng, file = download_dir) @@ -63,24 +63,29 @@ statcan_data <- function(tableNumber, lang) #return(NULL) } else { # network is up = proceed to download via curl message("statcanR: downloading remote table.") - with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlEng, download_dir)) + with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlEng, download_file)) # unziping the downloaded data file in English version - unzip_dir <- file.path(paste0(tempdir(), "/")) - utils::unzip(zipfile = download_dir, exdir = unzip_dir) - unlink(download_dir) + unzip_dir <- tempfile() + utils::unzip(zipfile = download_file, exdir = unzip_dir) + unlink(download_file) + on.exit(unlink(unzip_dir, recursive = TRUE)) # loading the data file in English version - csv_file <- file.path(paste0(tempdir(), "/", tableNumber, ".csv")) + csv_file <- file.path(unzip_dir, paste0(tableNumber, ".csv")) can_data <- data.table::fread(csv_file) # adding to the data.frame or data.table the Official Data Table # Indicator defined by Statistics Canada and based on metadata file. can_data$INDICATOR <- as.character(0) can_data$COORDINATE <- as.character(can_data$COORDINATE) - can_data$INDICATOR <- as.character(readr::read_csv(paste0(tempdir(), - "/", tableNumber, "_MetaData.csv"))[1, 1]) + lines <- readLines(file.path(unzip_dir, paste0(tableNumber, "_MetaData.csv")), n = 2) + # Remove terminal comma from metadata + lines[2] <- sub(",$", "", lines[2]) + metadata <- readr::read_csv(I(lines), show_col_types = FALSE) + probs <- problems(metadata) + can_data$INDICATOR <- as.character(metadata[1, 1]) if(nchar(can_data[1,1])==9){ can_data$REF_DATE <- sub(".*/", "", can_data$REF_DATE) @@ -100,9 +105,6 @@ statcan_data <- function(tableNumber, lang) can_data$REF_DATE <- as.Date(can_data$REF_DATE) } - # creating a data frame - data.table::setDF(return(can_data)) - } # /if - network up or down } @@ -114,7 +116,7 @@ statcan_data <- function(tableNumber, lang) urlFra <- paste0("https://www150.statcan.gc.ca/n1/fr/tbl/csv/", tableNumber, "-fra.zip") - download_dir <- file.path(tempdir(), "datasetFra.zip") + download_file <- file.path(tempdir(), "datasetFra.zip") #utils::download.file(urlFra, download_dir, method = "curl") #data.table::fread(urlFra, download_dir) #curl::curl_download(urlFra, download_dir) @@ -124,24 +126,26 @@ statcan_data <- function(tableNumber, lang) return(NULL) } else { # network is up = proceed to download via curl message("statcanR: downloading remote table.") - with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlFra, download_dir)) + with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlFra, download_file)) # unzipping the downloaded data file in French version - unzip_dir <- file.path(paste0(tempdir(), "/")) - utils::unzip(zipfile = download_dir, exdir = unzip_dir) - unlink(download_dir) - - # loading the data file in French version - csv_file <- file.path(paste0(tempdir(), "/", tableNumber, ".csv")) + unzip_dir <- tempfile() + utils::unzip(zipfile = download_file, exdir = unzip_dir) + unlink(download_file) + on.exit(unlink(unzip_dir, recursive = TRUE)) + + # loading the data file in French version + csv_file <- file.path(unzip_dir, paste0(tableNumber, ".csv")) can_data <- data.table::fread(csv_file) # adding to the data.frame or data.table the Official Data Table # Indicator defined by Statitics Canada and based on metadata file. can_data$INDICATOR <- as.character(0) can_data$COORDINATE <- as.character(can_data$COORDINATE) - can_data$INDICATOR <- as.character(readr::read_delim(paste0(tempdir(), - "/", tableNumber, "_MetaData.csv"), delim = ";", escape_double = FALSE, trim_ws = TRUE)[1, 1]) + metadata <- readr::read_delim(file.path(unzip_dir, paste0(tableNumber, "_MetaData.csv")), delim = ";", escape_double = FALSE, trim_ws = TRUE, n_max = 1) + probs <- problems(metadata) + can_data$INDICATOR <- as.character(metadata[1, 1]) can_data[,1] <- "REF_DATE" @@ -163,168 +167,22 @@ statcan_data <- function(tableNumber, lang) can_data$REF_DATE <- as.Date(can_data$REF_DATE) } - # creating a data frame - data.table::setDF(return(can_data)) } # /if - network up or down } + if (!is.null(probs)) + attr(can_data, "problems") <- probs - # removing the temp folder and creating a data frame - unlink(tempdir()) + can_data } -#' statcanR download data function -#' -#' @param tableNumber The table number of the Statistics Canada data table -#' @param lang The language wanted -#' -#' @return The output will be a data table and csv file representing the data associated with the chosen table number. #' @export -#' -#' -#' @import curl -#' @import tibble -#' @importFrom httr http_error -#' @importFrom data.table fread -#' @importFrom data.table setDF -#' -#' @examples -#' mydata <- statcan_data('27-10-0014-01', 'eng') +#' @rdname statcan_data -# Download function for statcan -statcan_download_data <- function(tableNumber, lang) -{ - - # identifying the table number - tableNumber <- gsub("-", "", substr(tableNumber, 1, nchar(tableNumber) - - 2)) - - # getting data in English version - if (lang == "eng") - { - # downloading the data file in English version - urlEng <- paste0("https://www150.statcan.gc.ca/n1/en/tbl/csv/", - tableNumber, "-eng.zip") - - download_dir <- file.path(tempdir(), "datasetEng.zip") - #utils::download.file(urlEng, download_dir, method = "curl") - #data.table::fread(input = urlEng, file = download_dir) - #RCurl::getURL(urlEng, ssl.verifyhost=FALSE, ssl.verifypeer=FALSE) - #curl::curl_download(urlEng, download_dir) - - if (httr::http_error(urlEng)) { # network is down = message (not an error anymore) - message("No Internet connection or incorrect statcan table number or Statistics Canada website under maintenance.") - #return(NULL) - } else { # network is up = proceed to download via curl - message("statcanR: downloading remote table.") - with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlEng, download_dir)) - - # unziping the downloaded data file in English version - unzip_dir <- file.path(paste0(tempdir(), "/")) - utils::unzip(zipfile = download_dir, exdir = unzip_dir) - unlink(download_dir) - - # loading the data file in English version - csv_file <- file.path(paste0(tempdir(), "/", tableNumber, ".csv")) - can_data <- data.table::fread(csv_file) - - # adding to the data.frame or data.table the Official Data Table - # Indicator defined by Statistics Canada and based on metadata file. - can_data$INDICATOR <- as.character(0) - can_data$COORDINATE <- as.character(can_data$COORDINATE) - can_data$INDICATOR <- as.character(readr::read_csv(paste0(tempdir(), - "/", tableNumber, "_MetaData.csv"))[1, 1]) - - if(nchar(can_data[1,1])==9){ - can_data$REF_DATE <- sub(".*/", "", can_data$REF_DATE) - can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 3, 31, sep = "-")) - can_data <- tibble::add_column(can_data, REF_PERIOD = "Fiscal year", .before = 2) - } - - else if(nchar(can_data[1,1])==7){ - can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 1, sep = "-")) - } - - else if(nchar(can_data[1,1])==4){ - can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 1, 1, sep = "-")) - } - - else{ - can_data$REF_DATE <- as.Date(can_data$REF_DATE) - } - # creating a data frame - data.table::setDF(return(can_data)) - readr::write_csv(can_data, paste0("./statcan_",tableNumber,"_", lang,".csv")) - - } # /if - network up or down - - } - - # getting data in French version - if (lang == "fra") - { - # downloading the data file in French version - urlFra <- paste0("https://www150.statcan.gc.ca/n1/fr/tbl/csv/", - tableNumber, "-fra.zip") - - download_dir <- file.path(tempdir(), "datasetFra.zip") - #utils::download.file(urlFra, download_dir, method = "curl") - #data.table::fread(urlFra, download_dir) - #curl::curl_download(urlFra, download_dir) - - if (httr::http_error(urlFra)) { # network is down = message (not an error anymore) - message("No Internet connection or incorrect statcan table number or Statistics Canada website under maintenance.") - return(NULL) - } else { # network is up = proceed to download via curl - message("statcanR: downloading remote table.") - with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlFra, download_dir)) - - - - # unzipping the downloaded data file in French version - unzip_dir <- file.path(paste0(tempdir(), "/")) - utils::unzip(zipfile = download_dir, exdir = unzip_dir) - unlink(download_dir) - - # loading the data file in French version - csv_file <- file.path(paste0(tempdir(), "/", tableNumber, ".csv")) - can_data <- data.table::fread(csv_file) - - # adding to the data.frame or data.table the Official Data Table - # Indicator defined by Statistics Canada and based on metadata file. - can_data$INDICATOR <- as.character(0) - can_data$COORDINATE <- as.character(can_data$COORDINATE) - can_data$INDICATOR <- as.character(readr::read_delim(paste0(tempdir(), - "/", tableNumber, "_MetaData.csv"), delim = ";", escape_double = FALSE, trim_ws = TRUE)[1, 1]) - - can_data[,1] <- "REF_DATE" - - if(nchar(can_data[1,1])==9){ - can_data$REF_DATE <- sub(".*/", "", can_data$REF_DATE) - can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 3, 31, sep = "-")) - can_data <- tibble::add_column(can_data, REF_PERIOD = "Fiscal year", .before = 2) - } - - else if(nchar(can_data[1,1])==7){ - can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 1, sep = "-")) - } - - else if(nchar(can_data[1,1])==4){ - can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 1, 1, sep = "-")) - } - - else{ - can_data$REF_DATE <- as.Date(can_data$REF_DATE) - } - # creating a data frame - data.table::setDF(return(can_data)) - - readr::write_csv(can_data, paste0("./statcan_",tableNumber,"_", lang,".csv")) - } # /if - network up or down - } - - # removing the temp folder and creating a data frame - unlink(tempdir()) -} \ No newline at end of file +statcan_download_data <- statcan_data + +#' @importFrom readr problems +#' @export +readr::problems diff --git a/man/reexports.Rd b/man/reexports.Rd new file mode 100644 index 0000000..ed06f05 --- /dev/null +++ b/man/reexports.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/statCanR.R +\docType{import} +\name{reexports} +\alias{reexports} +\alias{problems} +\title{Objects exported from other packages} +\keyword{internal} +\description{ +These objects are imported from other packages. Follow the links +below to see their documentation. + +\describe{ + \item{readr}{\code{\link[readr]{problems}}} +}} + diff --git a/man/statcan_data.Rd b/man/statcan_data.Rd index eb61b5f..8e40a05 100644 --- a/man/statcan_data.Rd +++ b/man/statcan_data.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/statCanR.R \name{statcan_data} \alias{statcan_data} +\alias{statcan_download_data} \title{statcanR} \usage{ statcan_data(tableNumber, lang) + +statcan_download_data(tableNumber, lang) } \arguments{ \item{tableNumber}{The table number of the Statistics Canada data table} diff --git a/man/statcan_download_data.Rd b/man/statcan_download_data.Rd deleted file mode 100644 index 0f0d0ba..0000000 --- a/man/statcan_download_data.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/statCanR.R -\name{statcan_download_data} -\alias{statcan_download_data} -\title{statcanR download data function} -\usage{ -statcan_download_data(tableNumber, lang) -} -\arguments{ -\item{tableNumber}{The table number of the Statistics Canada data table} - -\item{lang}{The language wanted} -} -\value{ -The output will be a data table and csv file representing the data associated with the chosen table number. -} -\description{ -statcanR download data function -} -\examples{ -mydata <- statcan_data('27-10-0014-01', 'eng') -}