Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(problems)
export(statcan_data)
export(statcan_download_data)
export(statcan_search)
Expand All @@ -14,3 +15,4 @@ import(tibble)
importFrom(data.table,fread)
importFrom(data.table,setDF)
importFrom(httr,http_error)
importFrom(readr,problems)
210 changes: 34 additions & 176 deletions R/statCanR.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ statcan_data <- function(tableNumber, lang)
urlEng <- paste0("https://www150.statcan.gc.ca/n1/en/tbl/csv/",
tableNumber, "-eng.zip")

download_dir <- file.path(tempdir(), "datasetEng.zip")
download_file <- file.path(tempdir(), "datasetEng.zip")

#utils::download.file(urlEng, download_dir, method = "curl")
#data.table::fread(input = urlEng, file = download_dir)
Expand All @@ -63,24 +63,29 @@ statcan_data <- function(tableNumber, lang)
#return(NULL)
} else { # network is up = proceed to download via curl
message("statcanR: downloading remote table.")
with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlEng, download_dir))
with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlEng, download_file))


# unziping the downloaded data file in English version
unzip_dir <- file.path(paste0(tempdir(), "/"))
utils::unzip(zipfile = download_dir, exdir = unzip_dir)
unlink(download_dir)
unzip_dir <- tempfile()
utils::unzip(zipfile = download_file, exdir = unzip_dir)
unlink(download_file)
on.exit(unlink(unzip_dir, recursive = TRUE))

# loading the data file in English version
csv_file <- file.path(paste0(tempdir(), "/", tableNumber, ".csv"))
csv_file <- file.path(unzip_dir, paste0(tableNumber, ".csv"))
can_data <- data.table::fread(csv_file)

# adding to the data.frame or data.table the Official Data Table
# Indicator defined by Statistics Canada and based on metadata file.
can_data$INDICATOR <- as.character(0)
can_data$COORDINATE <- as.character(can_data$COORDINATE)
can_data$INDICATOR <- as.character(readr::read_csv(paste0(tempdir(),
"/", tableNumber, "_MetaData.csv"))[1, 1])
lines <- readLines(file.path(unzip_dir, paste0(tableNumber, "_MetaData.csv")), n = 2)
# Remove terminal comma from metadata
lines[2] <- sub(",$", "", lines[2])
metadata <- readr::read_csv(I(lines), show_col_types = FALSE)
probs <- problems(metadata)
can_data$INDICATOR <- as.character(metadata[1, 1])

if(nchar(can_data[1,1])==9){
can_data$REF_DATE <- sub(".*/", "", can_data$REF_DATE)
Expand All @@ -100,9 +105,6 @@ statcan_data <- function(tableNumber, lang)
can_data$REF_DATE <- as.Date(can_data$REF_DATE)
}

# creating a data frame
data.table::setDF(return(can_data))

} # /if - network up or down

}
Expand All @@ -114,7 +116,7 @@ statcan_data <- function(tableNumber, lang)
urlFra <- paste0("https://www150.statcan.gc.ca/n1/fr/tbl/csv/",
tableNumber, "-fra.zip")

download_dir <- file.path(tempdir(), "datasetFra.zip")
download_file <- file.path(tempdir(), "datasetFra.zip")
#utils::download.file(urlFra, download_dir, method = "curl")
#data.table::fread(urlFra, download_dir)
#curl::curl_download(urlFra, download_dir)
Expand All @@ -124,24 +126,26 @@ statcan_data <- function(tableNumber, lang)
return(NULL)
} else { # network is up = proceed to download via curl
message("statcanR: downloading remote table.")
with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlFra, download_dir))
with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlFra, download_file))


# unzipping the downloaded data file in French version
unzip_dir <- file.path(paste0(tempdir(), "/"))
utils::unzip(zipfile = download_dir, exdir = unzip_dir)
unlink(download_dir)

# loading the data file in French version
csv_file <- file.path(paste0(tempdir(), "/", tableNumber, ".csv"))
unzip_dir <- tempfile()
utils::unzip(zipfile = download_file, exdir = unzip_dir)
unlink(download_file)
on.exit(unlink(unzip_dir, recursive = TRUE))

# loading the data file in French version
csv_file <- file.path(unzip_dir, paste0(tableNumber, ".csv"))
can_data <- data.table::fread(csv_file)

# adding to the data.frame or data.table the Official Data Table
# Indicator defined by Statitics Canada and based on metadata file.
can_data$INDICATOR <- as.character(0)
can_data$COORDINATE <- as.character(can_data$COORDINATE)
can_data$INDICATOR <- as.character(readr::read_delim(paste0(tempdir(),
"/", tableNumber, "_MetaData.csv"), delim = ";", escape_double = FALSE, trim_ws = TRUE)[1, 1])
metadata <- readr::read_delim(file.path(unzip_dir, paste0(tableNumber, "_MetaData.csv")), delim = ";", escape_double = FALSE, trim_ws = TRUE, n_max = 1)
probs <- problems(metadata)
can_data$INDICATOR <- as.character(metadata[1, 1])

can_data[,1] <- "REF_DATE"

Expand All @@ -163,168 +167,22 @@ statcan_data <- function(tableNumber, lang)
can_data$REF_DATE <- as.Date(can_data$REF_DATE)
}

# creating a data frame
data.table::setDF(return(can_data))
} # /if - network up or down

}

if (!is.null(probs))
attr(can_data, "problems") <- probs

# removing the temp folder and creating a data frame
unlink(tempdir())
can_data

}

#' statcanR download data function
#'
#' @param tableNumber The table number of the Statistics Canada data table
#' @param lang The language wanted
#'
#' @return The output will be a data table and csv file representing the data associated with the chosen table number.
#' @export
#'
#'
#' @import curl
#' @import tibble
#' @importFrom httr http_error
#' @importFrom data.table fread
#' @importFrom data.table setDF
#'
#' @examples
#' mydata <- statcan_data('27-10-0014-01', 'eng')
#' @rdname statcan_data

# Download function for statcan
statcan_download_data <- function(tableNumber, lang)
{

# identifying the table number
tableNumber <- gsub("-", "", substr(tableNumber, 1, nchar(tableNumber) -
2))

# getting data in English version
if (lang == "eng")
{
# downloading the data file in English version
urlEng <- paste0("https://www150.statcan.gc.ca/n1/en/tbl/csv/",
tableNumber, "-eng.zip")

download_dir <- file.path(tempdir(), "datasetEng.zip")
#utils::download.file(urlEng, download_dir, method = "curl")
#data.table::fread(input = urlEng, file = download_dir)
#RCurl::getURL(urlEng, ssl.verifyhost=FALSE, ssl.verifypeer=FALSE)
#curl::curl_download(urlEng, download_dir)

if (httr::http_error(urlEng)) { # network is down = message (not an error anymore)
message("No Internet connection or incorrect statcan table number or Statistics Canada website under maintenance.")
#return(NULL)
} else { # network is up = proceed to download via curl
message("statcanR: downloading remote table.")
with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlEng, download_dir))

# unziping the downloaded data file in English version
unzip_dir <- file.path(paste0(tempdir(), "/"))
utils::unzip(zipfile = download_dir, exdir = unzip_dir)
unlink(download_dir)

# loading the data file in English version
csv_file <- file.path(paste0(tempdir(), "/", tableNumber, ".csv"))
can_data <- data.table::fread(csv_file)

# adding to the data.frame or data.table the Official Data Table
# Indicator defined by Statistics Canada and based on metadata file.
can_data$INDICATOR <- as.character(0)
can_data$COORDINATE <- as.character(can_data$COORDINATE)
can_data$INDICATOR <- as.character(readr::read_csv(paste0(tempdir(),
"/", tableNumber, "_MetaData.csv"))[1, 1])

if(nchar(can_data[1,1])==9){
can_data$REF_DATE <- sub(".*/", "", can_data$REF_DATE)
can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 3, 31, sep = "-"))
can_data <- tibble::add_column(can_data, REF_PERIOD = "Fiscal year", .before = 2)
}

else if(nchar(can_data[1,1])==7){
can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 1, sep = "-"))
}

else if(nchar(can_data[1,1])==4){
can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 1, 1, sep = "-"))
}

else{
can_data$REF_DATE <- as.Date(can_data$REF_DATE)
}
# creating a data frame
data.table::setDF(return(can_data))
readr::write_csv(can_data, paste0("./statcan_",tableNumber,"_", lang,".csv"))

} # /if - network up or down

}

# getting data in French version
if (lang == "fra")
{
# downloading the data file in French version
urlFra <- paste0("https://www150.statcan.gc.ca/n1/fr/tbl/csv/",
tableNumber, "-fra.zip")

download_dir <- file.path(tempdir(), "datasetFra.zip")
#utils::download.file(urlFra, download_dir, method = "curl")
#data.table::fread(urlFra, download_dir)
#curl::curl_download(urlFra, download_dir)

if (httr::http_error(urlFra)) { # network is down = message (not an error anymore)
message("No Internet connection or incorrect statcan table number or Statistics Canada website under maintenance.")
return(NULL)
} else { # network is up = proceed to download via curl
message("statcanR: downloading remote table.")
with(options(timeout = max(300, getOption("timeout"))),curl::curl_download(urlFra, download_dir))



# unzipping the downloaded data file in French version
unzip_dir <- file.path(paste0(tempdir(), "/"))
utils::unzip(zipfile = download_dir, exdir = unzip_dir)
unlink(download_dir)

# loading the data file in French version
csv_file <- file.path(paste0(tempdir(), "/", tableNumber, ".csv"))
can_data <- data.table::fread(csv_file)

# adding to the data.frame or data.table the Official Data Table
# Indicator defined by Statistics Canada and based on metadata file.
can_data$INDICATOR <- as.character(0)
can_data$COORDINATE <- as.character(can_data$COORDINATE)
can_data$INDICATOR <- as.character(readr::read_delim(paste0(tempdir(),
"/", tableNumber, "_MetaData.csv"), delim = ";", escape_double = FALSE, trim_ws = TRUE)[1, 1])

can_data[,1] <- "REF_DATE"

if(nchar(can_data[1,1])==9){
can_data$REF_DATE <- sub(".*/", "", can_data$REF_DATE)
can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 3, 31, sep = "-"))
can_data <- tibble::add_column(can_data, REF_PERIOD = "Fiscal year", .before = 2)
}

else if(nchar(can_data[1,1])==7){
can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 1, sep = "-"))
}

else if(nchar(can_data[1,1])==4){
can_data$REF_DATE <- as.Date(paste(can_data$REF_DATE, 1, 1, sep = "-"))
}

else{
can_data$REF_DATE <- as.Date(can_data$REF_DATE)
}
# creating a data frame
data.table::setDF(return(can_data))

readr::write_csv(can_data, paste0("./statcan_",tableNumber,"_", lang,".csv"))
} # /if - network up or down
}

# removing the temp folder and creating a data frame
unlink(tempdir())
}
statcan_download_data <- statcan_data

#' @importFrom readr problems
#' @export
readr::problems
16 changes: 16 additions & 0 deletions man/reexports.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/statcan_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 0 additions & 22 deletions man/statcan_download_data.Rd

This file was deleted.