Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some work on zen4R::download_zenodo() #35

Merged
merged 10 commits into from
Aug 13, 2020
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Authors@R: c(
person("Stephen", "Eglen", role = c("ctb"), comment = c(ORCID = "0000-0001-8607-8025")))
Maintainer: Emmanuel Blondel <[email protected]>
Depends: R (>= 3.3.0), methods
Imports: R6, httr, jsonlite, xml2, keyring, tools
Imports: R6, httr, jsonlite, xml2, keyring, tools, parallel
Suggests: testthat
Description: Provides an Interface to 'Zenodo' (<https://zenodo.org>) REST API,
including management of depositions, attribution of DOIs by 'Zenodo' and
Expand All @@ -17,5 +17,6 @@ License: MIT + file LICENSE
URL: https://github.com/eblondel/zen4R
BugReports: https://github.com/eblondel/zen4R/issues
LazyLoad: yes
RoxygenNote: 7.1.0
Roxygen: list(r6 = FALSE)
RoxygenNote: 7.1.1
Roxygen: list(r6 = FALSE)
Encoding: UTF-8
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ import(keyring)
import(methods)
import(xml2)
importFrom(R6,R6Class)
importFrom(parallel,mclapply)
importFrom(tools,file_path_as_absolute)
importFrom(tools,md5sum)
47 changes: 29 additions & 18 deletions R/ZenodoRecord.R
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@
#' \code{makeCluster} and passed as \code{cl} argument. After downloading all files, the cluster
#' will be stopped automatically.
#'
#' The logical argument \code{quiet} (default is \code{FALSE}) can be set to
#' suppress informative messages (not warnings).
#'
#' Additional arguments inherited from \code{parallel::mclapply} or the custom \code{parallel_handler}
#' can be added (eg. \code{mc.cores} for \code{mclapply})
#'
Expand Down Expand Up @@ -1148,58 +1151,66 @@ ZenodoRecord <- R6Class("ZenodoRecord",
},

#downloadFiles
downloadFiles = function(path = ".", parallel = FALSE, parallel_handler = NULL, cl = NULL, ...){
downloadFiles = function(path = ".", parallel = FALSE, parallel_handler = NULL, cl = NULL, quiet = FALSE, ...){
if(length(self$files)==0){
self$WARN(sprintf("No files to download for record '%s' (doi: '%s')",
self$id, self$doi))
}else{
files_summary <- sprintf("Download %s file%s from record '%s' (doi: '%s') - total size: %s",
files_summary <- sprintf("Will download %s file%s from record '%s' (doi: '%s') - total size: %s",
length(self$files), ifelse(length(self$files)>1,"s",""), self$id,
self$doi, sum(sapply(self$files, function(x){x$filesize})))

#download_file util
download_file <- function(file){
cat(sprintf("[zen4R][INFO] Downloading file '%s' from record '%s' (doi: '%s') - size: %s\n",
file$filename, self$id, self$doi, file$filesize))
if (!quiet) cat(sprintf("[zen4R][INFO] Downloading file '%s' - size: %s\n",
file$filename, file$filesize))
target_file <-file.path(path, file$filename)
download.file(url = file$links$download, destfile = target_file,
quiet = quiet)
}
#check_integrity util
check_integrity <- function(file){
target_file <-file.path(path, file$filename)
download.file(url = file$links$download, destfile = target_file)

#check md5sum
target_file_md5sum <- tools::md5sum(target_file)
if(target_file_md5sum==file$checksum){
cat(sprintf("[zen4R][INFO] File '%s' successfully downloaded at '%s' and its integrity verified (md5sum: %s)\n",
file$filename, tools::file_path_as_absolute(target_file), file$checksum))
if (!quiet) cat(sprintf("[zen4R][INFO] File '%s': integrity verified (md5sum: %s)\n",
file$filename, file$checksum))
}else{
warnMsg <- sprintf("[zen4R][WARN] Download issue: md5sum (%s) of file '%s' does not match Zenodo archive md4sum (%s)\n",
warnMsg <- sprintf("[zen4R][WARN] Download issue: md5sum (%s) of file '%s' does not match Zenodo archive md5sum (%s)\n",
target_file_md5sum, tools::file_path_as_absolute(target_file), file$checksum)
cat(warnMsg)
warning(warnMsg)
}
}


if(parallel){
self$INFO("Download in parallel mode")
if (!quiet) self$INFO("Download in parallel mode")
if(is.null(parallel_handler)){
self$INFO("Using default parallel 'mclapply' handler")
self$INFO(files_summary)
if (!quiet) self$INFO("Using default parallel 'mclapply' handler")
if (!quiet) self$INFO(files_summary)
invisible(mclapply(self$files, download_file, ...))
}else{
self$INFO("Using cluster-based parallel handler")
if (!quiet) self$INFO("Using cluster-based parallel handler")
if(is.null(cl)){
errMsg <- "No cluster object defined as 'cl' argument. Aborting file download..."
self$ERROR(errMsg)
stop(errMsg)
}
self$INFO(files_summary)
if (!quiet) self$INFO(files_summary)
invisible(parallel_handler(cl, self$files, download_file, ...))
try(stopCluster(cl))
}
}else{
self$INFO("Download in sequential mode")
self$INFO(files_summary)
if (!quiet) self$INFO("Download in sequential mode")
if (!quiet) self$INFO(files_summary)
invisible(lapply(self$files, download_file))
}
self$INFO("End of download")
if (!quiet) cat(sprintf("[zen4R][INFO] File%s downloaded at '%s'.\n",
ifelse(length(self$files)>1,"s",""), tools::file_path_as_absolute(path)))
if (!quiet) self$INFO("Verifying file integrity...")
invisible(lapply(self$files, check_integrity))
if (!quiet) self$INFO("End of download")
}
}

Expand Down
1 change: 1 addition & 0 deletions R/zen4R.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @import keyring
#' @importFrom tools file_path_as_absolute
#' @importFrom tools md5sum
#' @importFrom parallel mclapply
#'
#' @title Interface to 'Zenodo' REST API
#' @description Provides an Interface to 'Zenodo' (<https://zenodo.org>) REST API,
Expand Down
13 changes: 9 additions & 4 deletions R/zen4R_downloader.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
#' @description \code{download_zenodo} allows to download archives attached to a Zenodo
#' record, identified by its DOI or concept DOI.
#'
#' @usage download_zenodo("10.5281/zenodo.2547036")
#' @examples
#' \dontrun{
#' download_zenodo("10.5281/zenodo.2547036")
#' }
#'
#' @param doi a Zenodo DOI or concept DOI
#' @param path the target directory where to download files
#' @param logger a logger to print messages. The logger can be either NULL,
#' "INFO" (with minimum logs), or "DEBUG" (for complete curl http calls logs)
#' @param quiet Logical (\code{FALSE} by default).
#' Do you want to suppress informative messages (not warnings)?
#' @param ... any other arguments for parallel downloading (more information at
#'\link{ZenodoManager}, \code{downloadFiles()} documentation)
#'\link{ZenodoRecord}, \code{downloadFiles()} documentation)
#'
#' @export
#'
download_zenodo = function(doi, path = ".", logger = NULL, ...){
download_zenodo = function(doi, path = ".", logger = NULL, quiet = FALSE, ...){

zenodo <- ZenodoManager$new(logger = logger)
rec <- zenodo$getRecordByDOI(doi)
Expand All @@ -27,5 +32,5 @@ download_zenodo = function(doi, path = ".", logger = NULL, ...){
}
}
#download
rec$downloadFiles(path = path, ...)
rec$downloadFiles(path = path, quiet = quiet, ...)
}
5 changes: 4 additions & 1 deletion R/zen4R_versioning.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
#' @title get_versions
#' @description \code{get_versions} allows to execute a workflow
#'
#' @usage get_versions("10.5281/zenodo.2547036")
#' @examples
#' \dontrun{
#' get_versions("10.5281/zenodo.2547036")
#' }
#'
#' @param doi a Zenodo DOI or concept DOI
#' @param logger a logger to print messages. The logger can be either NULL,
Expand Down
3 changes: 3 additions & 0 deletions man/ZenodoRecord.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 11 additions & 2 deletions man/download_zenodo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion man/get_versions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.