diff --git a/.gitignore b/.gitignore index 5509140f2..adc4efd2b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.DS_Store +.Rproj.user diff --git a/DEVELOPING.md b/DEVELOPING.md index 8a47fa490..fd79e5ba6 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -2,9 +2,31 @@ In addition to providing out-of-the-box forests for quantile regression and instrumental variables, grf provides a framework for creating forests tailored to new statistical tasks. Certain components around splitting and prediction can be swapped out, within the general infrastructure for growing and predicting on trees. -### Working with the code +## Contributing -The core forest implementation is written in C++, with an R interface powered by Rcpp. We recommend using a full-powered C++ IDE such as CLion, Xcode, or Visual Studio when working with the core code. To build the R package from source, cd into `r-package` and run the `build_package.R` script. Code style consistency is checked with the [lintr](https://github.com/jimhester/lintr) package, and in RStudio you can make sure the code is consistent with the R standard by clicking _Addins->Style_. +This repository follows the standard open source protocol and setup with git where there is an abundance of existing resources to get up to speed (see for example the +contributing guidelines for well known packages in other languages, like Scikit-learn, Scipy, and pandas) + +Condensed greatly, the workflow is to fork this repository, check out a branch, commit your changes (forming an ideally legible commit history), +then submitting a pull request explaining your contribution, ideally referring to the issue you created, or the issue you chose to work on. + +## Working with the code + +The core forest implementation is written in C++, with an R interface powered by Rcpp. We recommend using a full-powered C++ IDE such as CLion, Xcode, or Visual Studio when working with the core code. + +## R package + +To build the R package from source, cd into `r-package` and run `build_package.R`. Required development dependencies are listed there +(note: it is recommended to install the latest [lintr](https://github.com/jimhester/lintr) package with `devtools::install_github("jimhester/lintr")`). This mimics the tests run when submitting a pull request. + +An alternative development workflow is to use the accompanying grf.Rproj and build and test the package with RStudio's build menu, which can be convenient +for quickly iterating C++/R code changes. + +### Note for Windows users: + +Symlinks in the src directory point to the core C++ and R bindings. On Windows one has to clone this repository with symlinks enabled: `git clone -c core.symlinks=true https://github.com/grf-labs/grf.git` (this command needs to be run as an administrator: right click _Command Prompt -> Run as administrator_). Caveat: the above RStudio workflow is not tested on Windows. + +## Core C++ ### Code structure diff --git a/r-package/.gitignore b/r-package/.gitignore index 4965a9b2e..38aadce35 100644 --- a/r-package/.gitignore +++ b/r-package/.gitignore @@ -23,8 +23,8 @@ # clion files .idea/ -# copied src files -grf/src/* +# Generated files +grf/src/RcppExports.cpp # documentation by roxygen *.Rd diff --git a/r-package/build_package.R b/r-package/build_package.R index 2725af452..323788b42 100755 --- a/r-package/build_package.R +++ b/r-package/build_package.R @@ -14,7 +14,6 @@ library(roxygen2) library(lintr) package.name <- "grf" -package.src <- "grf/src" # Check code style consistency linters <- with_defaults( @@ -36,21 +35,13 @@ if (!is.na(args[1]) && args[1] == "--as-cran") { write_union("grf/.Rbuildignore", "^tests/testthat/test_((?!cran).).*") } -# Copy Rcpp bindings and C++ source into the package src directory. Note that we -# don't copy in third_party/Eigen, because for the R package build we provide -# access to the library through RcppEigen. -unlink(package.src, recursive = TRUE) -dir.create(package.src) - -binding.files <- list.files("grf/bindings", full.names = TRUE) -file.copy(binding.files, package.src, recursive = FALSE) -file.copy("../core/src", package.src, recursive = TRUE) -file.copy("../core/third_party/optional", package.src, recursive = TRUE) - # Auto-generate documentation files roxygen2::roxygenise(package.name) # Run Rcpp and build the package. +# Symlinks in `grf/src` point to the Rcpp bindings (`grf/bindings`) and core C++ (`core/src`). +# Note: we don't link in third_party/Eigen, because for the R package build we provide +# access to the library through RcppEigen. compileAttributes(package.name) clean_dll(package.name) build(package.name) diff --git a/r-package/grf/grf.Rproj b/r-package/grf/grf.Rproj new file mode 100644 index 000000000..270314b87 --- /dev/null +++ b/r-package/grf/grf.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/r-package/grf/src/AnalysisToolsBindings.cpp b/r-package/grf/src/AnalysisToolsBindings.cpp new file mode 120000 index 000000000..dbc5136ec --- /dev/null +++ b/r-package/grf/src/AnalysisToolsBindings.cpp @@ -0,0 +1 @@ +../bindings/AnalysisToolsBindings.cpp \ No newline at end of file diff --git a/r-package/grf/src/CausalForestBindings.cpp b/r-package/grf/src/CausalForestBindings.cpp new file mode 120000 index 000000000..6e504122e --- /dev/null +++ b/r-package/grf/src/CausalForestBindings.cpp @@ -0,0 +1 @@ +../bindings/CausalForestBindings.cpp \ No newline at end of file diff --git a/r-package/grf/src/CustomForestBindings.cpp b/r-package/grf/src/CustomForestBindings.cpp new file mode 120000 index 000000000..0d66f6a16 --- /dev/null +++ b/r-package/grf/src/CustomForestBindings.cpp @@ -0,0 +1 @@ +../bindings/CustomForestBindings.cpp \ No newline at end of file diff --git a/r-package/grf/src/InstrumentalForestBindings.cpp b/r-package/grf/src/InstrumentalForestBindings.cpp new file mode 120000 index 000000000..2b73d5f13 --- /dev/null +++ b/r-package/grf/src/InstrumentalForestBindings.cpp @@ -0,0 +1 @@ +../bindings/InstrumentalForestBindings.cpp \ No newline at end of file diff --git a/r-package/grf/src/Makevars b/r-package/grf/src/Makevars new file mode 120000 index 000000000..da63ce738 --- /dev/null +++ b/r-package/grf/src/Makevars @@ -0,0 +1 @@ +../bindings/Makevars \ No newline at end of file diff --git a/r-package/grf/src/QuantileForestBindings.cpp b/r-package/grf/src/QuantileForestBindings.cpp new file mode 120000 index 000000000..efda927fb --- /dev/null +++ b/r-package/grf/src/QuantileForestBindings.cpp @@ -0,0 +1 @@ +../bindings/QuantileForestBindings.cpp \ No newline at end of file diff --git a/r-package/grf/src/RcppUtilities.cpp b/r-package/grf/src/RcppUtilities.cpp new file mode 120000 index 000000000..a07b58325 --- /dev/null +++ b/r-package/grf/src/RcppUtilities.cpp @@ -0,0 +1 @@ +../bindings/RcppUtilities.cpp \ No newline at end of file diff --git a/r-package/grf/src/RcppUtilities.h b/r-package/grf/src/RcppUtilities.h new file mode 120000 index 000000000..06e4328c5 --- /dev/null +++ b/r-package/grf/src/RcppUtilities.h @@ -0,0 +1 @@ +../bindings/RcppUtilities.h \ No newline at end of file diff --git a/r-package/grf/src/RegressionForestBindings.cpp b/r-package/grf/src/RegressionForestBindings.cpp new file mode 120000 index 000000000..e4ce3477b --- /dev/null +++ b/r-package/grf/src/RegressionForestBindings.cpp @@ -0,0 +1 @@ +../bindings/RegressionForestBindings.cpp \ No newline at end of file diff --git a/r-package/grf/src/optional b/r-package/grf/src/optional new file mode 120000 index 000000000..8bdaaa889 --- /dev/null +++ b/r-package/grf/src/optional @@ -0,0 +1 @@ +../../../core/third_party/optional/ \ No newline at end of file diff --git a/r-package/grf/src/src b/r-package/grf/src/src new file mode 120000 index 000000000..9baf1e2bc --- /dev/null +++ b/r-package/grf/src/src @@ -0,0 +1 @@ +../../../core/src/ \ No newline at end of file