From 7d20334b0d8261276b182bd1615d8930cf668e5e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 19 Nov 2023 11:44:16 -0800 Subject: [PATCH] New one_call_pipe_linter (#2294) * New one_call_pipe_linter * examples * Skip native pipe tests on old R * confirm logic works with native placeholder, simplify, add tests * test metadata * fix bad merge * re-use 'magrittr_pipes' * multi-line case for robustness * finish merge --- DESCRIPTION | 1 + NAMESPACE | 1 + R/one_call_pipe_linter.R | 78 +++++++++++++ inst/lintr/linters.csv | 1 + man/linters.Rd | 5 +- man/one_call_pipe_linter.Rd | 51 +++++++++ man/readability_linters.Rd | 1 + man/style_linters.Rd | 1 + tests/testthat/test-one_call_pipe_linter.R | 123 +++++++++++++++++++++ 9 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 R/one_call_pipe_linter.R create mode 100644 man/one_call_pipe_linter.Rd create mode 100644 tests/testthat/test-one_call_pipe_linter.R diff --git a/DESCRIPTION b/DESCRIPTION index 84fc96efa..7c4b578f4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -144,6 +144,7 @@ Collate: 'object_length_linter.R' 'object_name_linter.R' 'object_usage_linter.R' + 'one_call_pipe_linter.R' 'outer_negation_linter.R' 'package_hooks_linter.R' 'paren_body_linter.R' diff --git a/NAMESPACE b/NAMESPACE index c4e31583e..245d0849c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,6 +106,7 @@ export(nzchar_linter) export(object_length_linter) export(object_name_linter) export(object_usage_linter) +export(one_call_pipe_linter) export(open_curly_linter) export(outer_negation_linter) export(package_hooks_linter) diff --git a/R/one_call_pipe_linter.R b/R/one_call_pipe_linter.R new file mode 100644 index 000000000..4ee4c094d --- /dev/null +++ b/R/one_call_pipe_linter.R @@ -0,0 +1,78 @@ +#' Block single-call magrittr pipes +#' +#' Prefer using a plain call instead of a pipe with only one call, +#' i.e. `1:10 %>% sum()` should instead be `sum(1:10)`. Note that +#' calls in the first `%>%` argument count. `rowSums(x) %>% max()` is OK +#' because there are two total calls (`rowSums()` and `max()`). +#' +#' Note also that un-"called" steps are *not* counted, since they should +#' be calls (see [pipe_call_linter()]). +#' +#' @examples +#' # will produce lints +#' lint( +#' text = "(1:10) %>% sum()", +#' linters = one_call_pipe_linter() +#' ) +#' +#' lint( +#' text = "DT %>% .[grp == 'a', sum(v)]", +#' linters = one_call_pipe_linter() +#' ) +#' +#' # okay +#' lint( +#' text = "rowSums(x) %>% mean()", +#' linters = one_call_pipe_linter() +#' ) +#' +#' lint( +#' text = "DT[src == 'a', .N, by = grp] %>% .[N > 10]", +#' linters = one_call_pipe_linter() +#' ) +#' +#' @evalRd rd_tags("one_call_pipe_linter") +#' @seealso +#' - [linters] for a complete list of linters available in lintr. +#' - +#' @export +one_call_pipe_linter <- function() { + pipes_cond <- xp_text_in_table(magrittr_pipes) + + # preceding-sibling::SPECIAL: if there are ever two pipes, don't lint + # OP-LEFT-BRACKET/LBB: accept DT[...] %>% .[...] as a two-call pipe, + # (but not DT %>% .[...]) + # parent::expr/SPECIAL: make sure we are at the top of a pipeline + # count(): any call anywhere else in the AST within the pipe expression + xpath <- glue(" + (//SPECIAL[{pipes_cond}] | //PIPE)[ + not(preceding-sibling::expr[1]/*[self::SPECIAL[{pipes_cond}] or self::PIPE]) + and ( + not(following-sibling::expr[OP-LEFT-BRACKET or LBB]) + or not(preceding-sibling::expr[OP-LEFT-BRACKET or LBB]) + ) + ] + /parent::expr[ + not(parent::expr/*[self::SPECIAL[{ pipes_cond }] or self::PIPE]) + and count(.//SYMBOL_FUNCTION_CALL) <= 1 + ] + ") + + Linter(function(source_expression) { + if (!is_lint_level(source_expression, "expression")) { + return(list()) + } + + xml <- source_expression$xml_parsed_content + + bad_expr <- xml_find_all(xml, xpath) + pipe <- xml_find_chr(bad_expr, "string(SPECIAL | PIPE)") + + xml_nodes_to_lints( + bad_expr, + source_expression = source_expression, + lint_message = paste0("Expressions with only a single call shouldn't use pipe ", pipe, "."), + type = "warning" + ) + }) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 507259837..03c1968d8 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -63,6 +63,7 @@ nzchar_linter,efficiency best_practices consistency object_length_linter,style readability default configurable executing object_name_linter,style consistency default configurable executing object_usage_linter,style readability correctness default executing configurable +one_call_pipe_linter,style readability open_curly_linter,defunct outer_negation_linter,readability efficiency best_practices package_hooks_linter,style correctness package_development diff --git a/man/linters.Rd b/man/linters.Rd index fcd9e2b37..66fee2b8f 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -28,10 +28,10 @@ The following tags exist: \item{\link[=executing_linters]{executing} (5 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (59 linters)} +\item{\link[=readability_linters]{readability} (60 linters)} \item{\link[=regex_linters]{regex} (4 linters)} \item{\link[=robustness_linters]{robustness} (16 linters)} -\item{\link[=style_linters]{style} (38 linters)} +\item{\link[=style_linters]{style} (39 linters)} } } \section{Linters}{ @@ -98,6 +98,7 @@ The following linters exist: \item{\code{\link{object_length_linter}} (tags: configurable, default, executing, readability, style)} \item{\code{\link{object_name_linter}} (tags: configurable, consistency, default, executing, style)} \item{\code{\link{object_usage_linter}} (tags: configurable, correctness, default, executing, readability, style)} +\item{\code{\link{one_call_pipe_linter}} (tags: readability, style)} \item{\code{\link{outer_negation_linter}} (tags: best_practices, efficiency, readability)} \item{\code{\link{package_hooks_linter}} (tags: correctness, package_development, style)} \item{\code{\link{paren_body_linter}} (tags: default, readability, style)} diff --git a/man/one_call_pipe_linter.Rd b/man/one_call_pipe_linter.Rd new file mode 100644 index 000000000..a20c19efd --- /dev/null +++ b/man/one_call_pipe_linter.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/one_call_pipe_linter.R +\name{one_call_pipe_linter} +\alias{one_call_pipe_linter} +\title{Block single-call magrittr pipes} +\usage{ +one_call_pipe_linter() +} +\description{ +Prefer using a plain call instead of a pipe with only one call, +i.e. \code{1:10 \%>\% sum()} should instead be \code{sum(1:10)}. Note that +calls in the first \verb{\%>\%} argument count. \code{rowSums(x) \%>\% max()} is OK +because there are two total calls (\code{rowSums()} and \code{max()}). +} +\details{ +Note also that un-"called" steps are \emph{not} counted, since they should +be calls (see \code{\link[=pipe_call_linter]{pipe_call_linter()}}). +} +\examples{ +# will produce lints +lint( + text = "(1:10) \%>\% sum()", + linters = one_call_pipe_linter() +) + +lint( + text = "DT \%>\% .[grp == 'a', sum(v)]", + linters = one_call_pipe_linter() +) + +# okay +lint( + text = "rowSums(x) \%>\% mean()", + linters = one_call_pipe_linter() +) + +lint( + text = "DT[src == 'a', .N, by = grp] \%>\% .[N > 10]", + linters = one_call_pipe_linter() +) + +} +\seealso{ +\itemize{ +\item \link{linters} for a complete list of linters available in lintr. +\item \url{https://style.tidyverse.org/pipes.html#short-pipes} +} +} +\section{Tags}{ +\link[=readability_linters]{readability}, \link[=style_linters]{style} +} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index ec6b71ee9..0ce6547e9 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -45,6 +45,7 @@ The following linters are tagged with 'readability': \item{\code{\link{numeric_leading_zero_linter}}} \item{\code{\link{object_length_linter}}} \item{\code{\link{object_usage_linter}}} +\item{\code{\link{one_call_pipe_linter}}} \item{\code{\link{outer_negation_linter}}} \item{\code{\link{paren_body_linter}}} \item{\code{\link{pipe_call_linter}}} diff --git a/man/style_linters.Rd b/man/style_linters.Rd index 37928ffa7..47c78fc2c 100644 --- a/man/style_linters.Rd +++ b/man/style_linters.Rd @@ -32,6 +32,7 @@ The following linters are tagged with 'style': \item{\code{\link{object_length_linter}}} \item{\code{\link{object_name_linter}}} \item{\code{\link{object_usage_linter}}} +\item{\code{\link{one_call_pipe_linter}}} \item{\code{\link{package_hooks_linter}}} \item{\code{\link{paren_body_linter}}} \item{\code{\link{pipe_call_linter}}} diff --git a/tests/testthat/test-one_call_pipe_linter.R b/tests/testthat/test-one_call_pipe_linter.R new file mode 100644 index 000000000..50be3a38b --- /dev/null +++ b/tests/testthat/test-one_call_pipe_linter.R @@ -0,0 +1,123 @@ +test_that("one_call_pipe_linter skips allowed usages", { + linter <- one_call_pipe_linter() + + # two pipe steps is OK + expect_lint("x %>% foo() %>% bar()", NULL, linter) + # call in first step --> OK + expect_lint("foo(x) %>% bar()", NULL, linter) + # both calls in second step --> OK + expect_lint("x %>% foo(bar(.))", NULL, linter) +}) + +test_that("one_call_pipe_linter blocks simple disallowed usages", { + linter <- one_call_pipe_linter() + lint_msg <- rex::rex("Expressions with only a single call shouldn't use pipe %>%.") + + expect_lint("x %>% foo()", lint_msg, linter) + + # new lines don't matter + expect_lint("x %>%\n foo()", lint_msg, linter) + + # catch the "inner" pipe chain, not the "outer" one + # TODO(michaelchirico): actually, this should lint twice -- we're too aggressive + # in counting _all_ nested calls. + expect_lint("x %>% inner_join(y %>% filter(is_treatment))", lint_msg, linter) +}) + +test_that("one_call_pipe_linter skips data.table chains", { + linter <- one_call_pipe_linter() + lint_msg <- rex::rex("Expressions with only a single call shouldn't use pipe %>%.") + + expect_lint("DT[x > 5, sum(y), by = keys] %>% .[, .SD[1], by = key1]", NULL, linter) + + # lint here: instead of a pipe, use DT[x > 5, sum(y), by = keys] + expect_lint("DT %>% .[x > 5, sum(y), by = keys]", lint_msg, linter) + + # ditto for [[ + expect_lint("DT %>% rowSums() %>% .[[idx]]", NULL, linter) + + expect_lint("DT %>% .[[idx]]", lint_msg, linter) +}) + +test_that("one_call_pipe_linter treats all pipes equally", { + linter <- one_call_pipe_linter() + + expect_lint("foo %>% bar() %$% col", NULL, linter) + expect_lint( + "x %T>% foo()", + rex::rex("Expressions with only a single call shouldn't use pipe %T>%."), + linter + ) + expect_lint( + "x %$%\n foo()", + rex::rex("Expressions with only a single call shouldn't use pipe %$%."), + linter + ) + expect_lint( + 'data %>% filter(type == "console") %$% obscured_gaia_id %>% unique()', + NULL, + linter + ) +}) + +test_that("multiple lints are generated correctly", { + expect_lint( + trim_some("{ + a %>% b() + c %$% d() + e %T>% + f() + }"), + list( + list(rex::rex("pipe %>%"), line_number = 2L), + list(rex::rex("pipe %$%"), line_number = 3L), + list(rex::rex("pipe %T>%"), line_number = 4L) + ), + one_call_pipe_linter() + ) +}) + +test_that("Native pipes are handled as well", { + skip_if_not_r_version("4.1.0") + + linter <- one_call_pipe_linter() + + expect_lint( + "x |> foo()", + rex::rex("Expressions with only a single call shouldn't use pipe |>."), + linter + ) + + # mixed pipes + expect_lint("x |> foo() %>% bar()", NULL, linter) + expect_lint("x %>% foo() |> bar()", NULL, linter) + + expect_lint( + trim_some("{ + a %>% b() + c |> d() + }"), + list( + list(message = "pipe %>%"), + list(message = "pipe |>") + ), + linter + ) +}) + +test_that("one_call_pipe_linter skips data.table chains with native pipe", { + skip_if_not_r_version("4.3.0") + + linter <- one_call_pipe_linter() + lint_msg <- rex::rex("Expressions with only a single call shouldn't use pipe |>.") + + expect_lint("DT[x > 5, sum(y), by = keys] |> _[, .SD[1], by = key1]", NULL, linter) + + # lint here: instead of a pipe, use DT[x > 5, sum(y), by = keys] + expect_lint("DT |> _[x > 5, sum(y), by = keys]", lint_msg, linter) + + # ditto for [[ + expect_lint("DT |> rowSums() |> _[[idx]]", NULL, linter) + + expect_lint("DT |> _[[idx]]", lint_msg, linter) +})