Skip to content

Commit

Permalink
Merge pull request #35 from florisvdh/download
Browse files Browse the repository at this point in the history
Some work on zen4R::download_zenodo()
  • Loading branch information
eblondel authored Aug 13, 2020
2 parents 82552a1 + b1fc871 commit 05aafb3
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 29 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Authors@R: c(
person("Stephen", "Eglen", role = c("ctb"), comment = c(ORCID = "0000-0001-8607-8025")))
Maintainer: Emmanuel Blondel <[email protected]>
Depends: R (>= 3.3.0), methods
Imports: R6, httr, jsonlite, xml2, keyring, tools
Imports: R6, httr, jsonlite, xml2, keyring, tools, parallel
Suggests: testthat
Description: Provides an Interface to 'Zenodo' (<https://zenodo.org>) REST API,
including management of depositions, attribution of DOIs by 'Zenodo' and
Expand All @@ -17,5 +17,6 @@ License: MIT + file LICENSE
URL: https://github.com/eblondel/zen4R
BugReports: https://github.com/eblondel/zen4R/issues
LazyLoad: yes
RoxygenNote: 7.1.0
Roxygen: list(r6 = FALSE)
RoxygenNote: 7.1.1
Roxygen: list(r6 = FALSE)
Encoding: UTF-8
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ import(keyring)
import(methods)
import(xml2)
importFrom(R6,R6Class)
importFrom(parallel,mclapply)
importFrom(tools,file_path_as_absolute)
importFrom(tools,md5sum)
47 changes: 29 additions & 18 deletions R/ZenodoRecord.R
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@
#' \code{makeCluster} and passed as \code{cl} argument. After downloading all files, the cluster
#' will be stopped automatically.
#'
#' The logical argument \code{quiet} (default is \code{FALSE}) can be set to
#' suppress informative messages (not warnings).
#'
#' Additional arguments inherited from \code{parallel::mclapply} or the custom \code{parallel_handler}
#' can be added (eg. \code{mc.cores} for \code{mclapply})
#'
Expand Down Expand Up @@ -1148,58 +1151,66 @@ ZenodoRecord <- R6Class("ZenodoRecord",
},

#downloadFiles
downloadFiles = function(path = ".", parallel = FALSE, parallel_handler = NULL, cl = NULL, ...){
downloadFiles = function(path = ".", parallel = FALSE, parallel_handler = NULL, cl = NULL, quiet = FALSE, ...){
if(length(self$files)==0){
self$WARN(sprintf("No files to download for record '%s' (doi: '%s')",
self$id, self$doi))
}else{
files_summary <- sprintf("Download %s file%s from record '%s' (doi: '%s') - total size: %s",
files_summary <- sprintf("Will download %s file%s from record '%s' (doi: '%s') - total size: %s",
length(self$files), ifelse(length(self$files)>1,"s",""), self$id,
self$doi, sum(sapply(self$files, function(x){x$filesize})))

#download_file util
download_file <- function(file){
cat(sprintf("[zen4R][INFO] Downloading file '%s' from record '%s' (doi: '%s') - size: %s\n",
file$filename, self$id, self$doi, file$filesize))
if (!quiet) cat(sprintf("[zen4R][INFO] Downloading file '%s' - size: %s\n",
file$filename, file$filesize))
target_file <-file.path(path, file$filename)
download.file(url = file$links$download, destfile = target_file,
quiet = quiet)
}
#check_integrity util
check_integrity <- function(file){
target_file <-file.path(path, file$filename)
download.file(url = file$links$download, destfile = target_file)

#check md5sum
target_file_md5sum <- tools::md5sum(target_file)
if(target_file_md5sum==file$checksum){
cat(sprintf("[zen4R][INFO] File '%s' successfully downloaded at '%s' and its integrity verified (md5sum: %s)\n",
file$filename, tools::file_path_as_absolute(target_file), file$checksum))
if (!quiet) cat(sprintf("[zen4R][INFO] File '%s': integrity verified (md5sum: %s)\n",
file$filename, file$checksum))
}else{
warnMsg <- sprintf("[zen4R][WARN] Download issue: md5sum (%s) of file '%s' does not match Zenodo archive md4sum (%s)\n",
warnMsg <- sprintf("[zen4R][WARN] Download issue: md5sum (%s) of file '%s' does not match Zenodo archive md5sum (%s)\n",
target_file_md5sum, tools::file_path_as_absolute(target_file), file$checksum)
cat(warnMsg)
warning(warnMsg)
}
}


if(parallel){
self$INFO("Download in parallel mode")
if (!quiet) self$INFO("Download in parallel mode")
if(is.null(parallel_handler)){
self$INFO("Using default parallel 'mclapply' handler")
self$INFO(files_summary)
if (!quiet) self$INFO("Using default parallel 'mclapply' handler")
if (!quiet) self$INFO(files_summary)
invisible(mclapply(self$files, download_file, ...))
}else{
self$INFO("Using cluster-based parallel handler")
if (!quiet) self$INFO("Using cluster-based parallel handler")
if(is.null(cl)){
errMsg <- "No cluster object defined as 'cl' argument. Aborting file download..."
self$ERROR(errMsg)
stop(errMsg)
}
self$INFO(files_summary)
if (!quiet) self$INFO(files_summary)
invisible(parallel_handler(cl, self$files, download_file, ...))
try(stopCluster(cl))
}
}else{
self$INFO("Download in sequential mode")
self$INFO(files_summary)
if (!quiet) self$INFO("Download in sequential mode")
if (!quiet) self$INFO(files_summary)
invisible(lapply(self$files, download_file))
}
self$INFO("End of download")
if (!quiet) cat(sprintf("[zen4R][INFO] File%s downloaded at '%s'.\n",
ifelse(length(self$files)>1,"s",""), tools::file_path_as_absolute(path)))
if (!quiet) self$INFO("Verifying file integrity...")
invisible(lapply(self$files, check_integrity))
if (!quiet) self$INFO("End of download")
}
}

Expand Down
1 change: 1 addition & 0 deletions R/zen4R.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @import keyring
#' @importFrom tools file_path_as_absolute
#' @importFrom tools md5sum
#' @importFrom parallel mclapply
#'
#' @title Interface to 'Zenodo' REST API
#' @description Provides an Interface to 'Zenodo' (<https://zenodo.org>) REST API,
Expand Down
13 changes: 9 additions & 4 deletions R/zen4R_downloader.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
#' @description \code{download_zenodo} allows to download archives attached to a Zenodo
#' record, identified by its DOI or concept DOI.
#'
#' @usage download_zenodo("10.5281/zenodo.2547036")
#' @examples
#' \dontrun{
#' download_zenodo("10.5281/zenodo.2547036")
#' }
#'
#' @param doi a Zenodo DOI or concept DOI
#' @param path the target directory where to download files
#' @param logger a logger to print messages. The logger can be either NULL,
#' "INFO" (with minimum logs), or "DEBUG" (for complete curl http calls logs)
#' @param quiet Logical (\code{FALSE} by default).
#' Do you want to suppress informative messages (not warnings)?
#' @param ... any other arguments for parallel downloading (more information at
#'\link{ZenodoManager}, \code{downloadFiles()} documentation)
#'\link{ZenodoRecord}, \code{downloadFiles()} documentation)
#'
#' @export
#'
download_zenodo = function(doi, path = ".", logger = NULL, ...){
download_zenodo = function(doi, path = ".", logger = NULL, quiet = FALSE, ...){

zenodo <- ZenodoManager$new(logger = logger)
rec <- zenodo$getRecordByDOI(doi)
Expand All @@ -27,5 +32,5 @@ download_zenodo = function(doi, path = ".", logger = NULL, ...){
}
}
#download
rec$downloadFiles(path = path, ...)
rec$downloadFiles(path = path, quiet = quiet, ...)
}
5 changes: 4 additions & 1 deletion R/zen4R_versioning.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
#' @title get_versions
#' @description \code{get_versions} allows to execute a workflow
#'
#' @usage get_versions("10.5281/zenodo.2547036")
#' @examples
#' \dontrun{
#' get_versions("10.5281/zenodo.2547036")
#' }
#'
#' @param doi a Zenodo DOI or concept DOI
#' @param logger a logger to print messages. The logger can be either NULL,
Expand Down
3 changes: 3 additions & 0 deletions man/ZenodoRecord.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 11 additions & 2 deletions man/download_zenodo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion man/get_versions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 05aafb3

Please sign in to comment.