From e3b0375dcf2523736dafe8f5a6a2debf777c069f Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Mon, 13 Mar 2023 14:56:10 -0700 Subject: [PATCH 1/8] link back to assets in instructor pages Instructor view copies over the HTML pages only and leaves the assets in the top-level directory. At the moment, it only checks for non-html resources, but it _is_ possible to include html resources in `episodes/files`, so I should modify this to also check for path length. --- R/utils-xml.R | 15 +++++++++------ tests/testthat/_snaps/utils-xml.md | 26 ++++++++++++++++++++++++++ tests/testthat/test-utils-xml.R | 17 +++++++++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 tests/testthat/_snaps/utils-xml.md diff --git a/R/utils-xml.R b/R/utils-xml.R index 19cbc24e3..46654514e 100644 --- a/R/utils-xml.R +++ b/R/utils-xml.R @@ -40,12 +40,12 @@ fix_codeblocks <- function(nodes = NULL) { add_code_heading <- function(codes = NULL, labels = "OUTPUT") { if (length(codes) == 0) return(codes) xml2::xml_set_attr(codes, "tabindex", "0") - heads <- xml2::xml_add_sibling(codes, "h3", labels, class = "code-label", + heads <- xml2::xml_add_sibling(codes, "h3", labels, class = "code-label", .where = "before") for (head in heads) { - xml2::xml_add_child(head, "i", + xml2::xml_add_child(head, "i", "aria-hidden" = "true", "data-feather" = "chevron-left") - xml2::xml_add_child(head, "i", + xml2::xml_add_child(head, "i", "aria-hidden" = "true", "data-feather" = "chevron-right") } invisible(codes) @@ -97,7 +97,7 @@ fix_setup_link <- function(nodes = NULL) { if (length(nodes) == 0) return(nodes) links <- xml2::xml_find_all(nodes, ".//a") hrefs <- xml2::url_parse(xml2::xml_attr(links, "href")) - setup_links <- hrefs$scheme == "" & + setup_links <- hrefs$scheme == "" & hrefs$server == "" & hrefs$path == "setup.html" xml2::xml_set_attr(links[setup_links], "href", "index.html#setup") @@ -115,9 +115,12 @@ use_learner <- function(nodes = NULL) { use_instructor <- function(nodes = NULL) { if (length(nodes) == 0) return(nodes) copy <- xml2::read_html(as.character(nodes)) - # lnk <- xml2::xml_find_all(copy, ".//a[not(starts-with(@href, 'http'))]") + lnk <- xml2::xml_find_all(copy, ".//a[not(starts-with(@href, 'http'))]") + lnk_hrefs <- xml2::xml_attr(lnk, "href") + is_above <- fs::path_ext(lnk_hrefs) != "html" + lnk_hrefs[is_above] <- fs::path("../", lnk_hrefs[is_above]) + xml2::xml_set_attr(lnk, "href", lnk_hrefs) img <- xml2::xml_find_all(copy, ".//img[not(starts-with(@src, 'http'))]") - # xml2::xml_set_attr(lnk, "href", fs::path("instructor/", xml2::xml_attr(lnk, "href"))) xml2::xml_set_attr(img, "src", fs::path("../", xml2::xml_attr(img, "src"))) as.character(copy) } diff --git a/tests/testthat/_snaps/utils-xml.md b/tests/testthat/_snaps/utils-xml.md new file mode 100644 index 000000000..8579c10c5 --- /dev/null +++ b/tests/testthat/_snaps/utils-xml.md @@ -0,0 +1,26 @@ +# paths in instructor view that are not HTML get diverted + + Code + xml2::xml_find_all(html_test, ".//a") + Output + {xml_nodeset (6)} + [1] a + [2] b + [3] c + [4] d + [5] e + [6] f + +--- + + Code + xml2::xml_find_all(res, ".//a") + Output + {xml_nodeset (6)} + [1] a + [2] b + [3] c + [4] d + [5] e + [6] f + diff --git a/tests/testthat/test-utils-xml.R b/tests/testthat/test-utils-xml.R index 018b80120..fb736ba16 100644 --- a/tests/testthat/test-utils-xml.R +++ b/tests/testthat/test-utils-xml.R @@ -1,4 +1,21 @@ + +test_that("paths in instructor view that are not HTML get diverted", { + html_test <- xml2::read_html(commonmark::markdown_html(c( + "[a](index.html)", + "[b](./index.html)", + "[c](fig/thing.png)", + "[d](./fig/thang.jpg)", + "[e](data/thing.csv)", + "[f](files/papers/thing.pdf)" + ))) + res <- xml2::read_html(use_instructor(html_test)) + expect_snapshot(xml2::xml_find_all(html_test, ".//a")) + expect_snapshot(xml2::xml_find_all(res, ".//a")) +}) + + + test_that("empty args result in nothing happening", { expect_null(fix_nodes()) expect_null(fix_setup_link()) From dba4d70db4cab5fc312b458e416a3820e68ca6be Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Mon, 13 Mar 2023 15:12:43 -0700 Subject: [PATCH 2/8] add test for existing HTML files --- tests/testthat/test-utils-xml.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-utils-xml.R b/tests/testthat/test-utils-xml.R index fb736ba16..4bef928d4 100644 --- a/tests/testthat/test-utils-xml.R +++ b/tests/testthat/test-utils-xml.R @@ -7,9 +7,14 @@ test_that("paths in instructor view that are not HTML get diverted", { "[c](fig/thing.png)", "[d](./fig/thang.jpg)", "[e](data/thing.csv)", - "[f](files/papers/thing.pdf)" + "[f](files/papers/thing.pdf)", + "[g](files/confirmation.html)" ))) res <- xml2::read_html(use_instructor(html_test)) + # there are fo + refs <- xml2::xml_text(xml2::xml_find_all(res, ".//@href")) + expect_equal(startsWith(refs, "../"), + c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE)) expect_snapshot(xml2::xml_find_all(html_test, ".//a")) expect_snapshot(xml2::xml_find_all(res, ".//a")) }) From 295f38e06a981e7e613ef3cac4944445182a331e Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Mon, 13 Mar 2023 15:22:37 -0700 Subject: [PATCH 3/8] look for nested paths --- R/utils-xml.R | 8 +++++++- tests/testthat/_snaps/utils-xml.md | 6 ++++-- tests/testthat/test-utils-xml.R | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/R/utils-xml.R b/R/utils-xml.R index 46654514e..b42c6910d 100644 --- a/R/utils-xml.R +++ b/R/utils-xml.R @@ -115,11 +115,17 @@ use_learner <- function(nodes = NULL) { use_instructor <- function(nodes = NULL) { if (length(nodes) == 0) return(nodes) copy <- xml2::read_html(as.character(nodes)) + # find all local links and transform non-html and nested links --------- lnk <- xml2::xml_find_all(copy, ".//a[not(starts-with(@href, 'http'))]") lnk_hrefs <- xml2::xml_attr(lnk, "href") - is_above <- fs::path_ext(lnk_hrefs) != "html" + # links without HTML extension + not_html <- fs::path_ext(lnk_hrefs) != "html" + # links that are not in the root directory (e.g. files/a.html, but not ./a.html) + is_nested <- lengths(strsplit(sub("^[.][/]", "", lnk_hrefs), "/")) > 1 + is_above <- not_html | is_nested lnk_hrefs[is_above] <- fs::path("../", lnk_hrefs[is_above]) xml2::xml_set_attr(lnk, "href", lnk_hrefs) + # find all images and refer back to source img <- xml2::xml_find_all(copy, ".//img[not(starts-with(@src, 'http'))]") xml2::xml_set_attr(img, "src", fs::path("../", xml2::xml_attr(img, "src"))) as.character(copy) diff --git a/tests/testthat/_snaps/utils-xml.md b/tests/testthat/_snaps/utils-xml.md index 8579c10c5..538e6a9c4 100644 --- a/tests/testthat/_snaps/utils-xml.md +++ b/tests/testthat/_snaps/utils-xml.md @@ -3,24 +3,26 @@ Code xml2::xml_find_all(html_test, ".//a") Output - {xml_nodeset (6)} + {xml_nodeset (7)} [1] a [2] b [3] c [4] d [5] e [6] f + [7] g --- Code xml2::xml_find_all(res, ".//a") Output - {xml_nodeset (6)} + {xml_nodeset (7)} [1] a [2] b [3] c [4] d [5] e [6] f + [7] g diff --git a/tests/testthat/test-utils-xml.R b/tests/testthat/test-utils-xml.R index 4bef928d4..b5a2a92d7 100644 --- a/tests/testthat/test-utils-xml.R +++ b/tests/testthat/test-utils-xml.R @@ -1,6 +1,6 @@ -test_that("paths in instructor view that are not HTML get diverted", { +test_that("paths in instructor view that are nested or not HTML get diverted", { html_test <- xml2::read_html(commonmark::markdown_html(c( "[a](index.html)", "[b](./index.html)", @@ -11,7 +11,7 @@ test_that("paths in instructor view that are not HTML get diverted", { "[g](files/confirmation.html)" ))) res <- xml2::read_html(use_instructor(html_test)) - # there are fo + # All but two refs are transformed according to our rules refs <- xml2::xml_text(xml2::xml_find_all(res, ".//@href")) expect_equal(startsWith(refs, "../"), c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE)) From 5b1e202e8aab97ef08c96c74c76e4a7a9b272faf Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Mon, 13 Mar 2023 15:24:26 -0700 Subject: [PATCH 4/8] update NEWS --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index 9d61d6aaa..3cc55a1e2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # sandpaper 0.11.9 (in development) +BUG FIX +------- + +* Links to assets in instructor view no longer render a 404. (reported: + @sarahmbrown, #404; fixed: @zkamvar, #408) + CONTINUOUS INTEGRATION ---------------------- From 72b58a5a5fad6d49946b8929ed50373a9ce1727b Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Mon, 13 Mar 2023 15:31:31 -0700 Subject: [PATCH 5/8] update snapshot --- tests/testthat/_snaps/utils-xml.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/_snaps/utils-xml.md b/tests/testthat/_snaps/utils-xml.md index 538e6a9c4..9e08cf8ce 100644 --- a/tests/testthat/_snaps/utils-xml.md +++ b/tests/testthat/_snaps/utils-xml.md @@ -1,4 +1,4 @@ -# paths in instructor view that are not HTML get diverted +# paths in instructor view that are nested or not HTML get diverted Code xml2::xml_find_all(html_test, ".//a") From b1ca39b95a01e049c4e1a9ac84473a24798d18d2 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 14 Mar 2023 07:09:47 -0700 Subject: [PATCH 6/8] update instructor link predicate to check for href This will also omit anchors from the search --- R/utils-xml.R | 4 +++- tests/testthat/_snaps/utils-xml.md | 10 ++++++---- tests/testthat/test-utils-xml.R | 10 ++++++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/R/utils-xml.R b/R/utils-xml.R index b42c6910d..3c3849a42 100644 --- a/R/utils-xml.R +++ b/R/utils-xml.R @@ -116,7 +116,9 @@ use_instructor <- function(nodes = NULL) { if (length(nodes) == 0) return(nodes) copy <- xml2::read_html(as.character(nodes)) # find all local links and transform non-html and nested links --------- - lnk <- xml2::xml_find_all(copy, ".//a[not(starts-with(@href, 'http'))]") + lnk <- xml2::xml_find_all(copy, + ".//a[@href][not(contains(@href, '://')) and not(starts-with(@href, '#'))]" + ) lnk_hrefs <- xml2::xml_attr(lnk, "href") # links without HTML extension not_html <- fs::path_ext(lnk_hrefs) != "html" diff --git a/tests/testthat/_snaps/utils-xml.md b/tests/testthat/_snaps/utils-xml.md index 9e08cf8ce..23ffb5db9 100644 --- a/tests/testthat/_snaps/utils-xml.md +++ b/tests/testthat/_snaps/utils-xml.md @@ -1,9 +1,9 @@ # paths in instructor view that are nested or not HTML get diverted Code - xml2::xml_find_all(html_test, ".//a") + xml2::xml_find_all(html_test, ".//a[@href]") Output - {xml_nodeset (7)} + {xml_nodeset (8)} [1] a [2] b [3] c @@ -11,13 +11,14 @@ [5] e [6] f [7] g + [8] h --- Code - xml2::xml_find_all(res, ".//a") + xml2::xml_find_all(res, ".//a[@href]") Output - {xml_nodeset (7)} + {xml_nodeset (8)} [1] a [2] b [3] c @@ -25,4 +26,5 @@ [5] e [6] f [7] g + [8] h diff --git a/tests/testthat/test-utils-xml.R b/tests/testthat/test-utils-xml.R index b5a2a92d7..302462a19 100644 --- a/tests/testthat/test-utils-xml.R +++ b/tests/testthat/test-utils-xml.R @@ -2,21 +2,23 @@ test_that("paths in instructor view that are nested or not HTML get diverted", { html_test <- xml2::read_html(commonmark::markdown_html(c( + "

h

\n", "[a](index.html)", "[b](./index.html)", "[c](fig/thing.png)", "[d](./fig/thang.jpg)", "[e](data/thing.csv)", "[f](files/papers/thing.pdf)", - "[g](files/confirmation.html)" + "[g](files/confirmation.html)", + "[h](#what-the)" ))) res <- xml2::read_html(use_instructor(html_test)) # All but two refs are transformed according to our rules refs <- xml2::xml_text(xml2::xml_find_all(res, ".//@href")) expect_equal(startsWith(refs, "../"), - c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE)) - expect_snapshot(xml2::xml_find_all(html_test, ".//a")) - expect_snapshot(xml2::xml_find_all(res, ".//a")) + c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE)) + expect_snapshot(xml2::xml_find_all(html_test, ".//a[@href]")) + expect_snapshot(xml2::xml_find_all(res, ".//a[@href]")) }) From 2898f849852675aad9b4cead727bb6b134a7acd7 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 14 Mar 2023 07:26:21 -0700 Subject: [PATCH 7/8] instructor view parse the URL before checking --- R/utils-xml.R | 5 ++-- tests/testthat/_snaps/utils-xml.md | 40 ++++++++++++++++-------------- tests/testthat/test-utils-xml.R | 18 ++++++++------ 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/R/utils-xml.R b/R/utils-xml.R index 3c3849a42..f8be4d5ce 100644 --- a/R/utils-xml.R +++ b/R/utils-xml.R @@ -120,10 +120,11 @@ use_instructor <- function(nodes = NULL) { ".//a[@href][not(contains(@href, '://')) and not(starts-with(@href, '#'))]" ) lnk_hrefs <- xml2::xml_attr(lnk, "href") + lnk_paths <- xml2::url_parse(lnk_hrefs)$path # links without HTML extension - not_html <- fs::path_ext(lnk_hrefs) != "html" + not_html <- !fs::path_ext(lnk_paths) %in% c("html", "") # links that are not in the root directory (e.g. files/a.html, but not ./a.html) - is_nested <- lengths(strsplit(sub("^[.][/]", "", lnk_hrefs), "/")) > 1 + is_nested <- lengths(strsplit(sub("^[.][/]", "", lnk_paths), "/")) > 1 is_above <- not_html | is_nested lnk_hrefs[is_above] <- fs::path("../", lnk_hrefs[is_above]) xml2::xml_set_attr(lnk, "href", lnk_hrefs) diff --git a/tests/testthat/_snaps/utils-xml.md b/tests/testthat/_snaps/utils-xml.md index 23ffb5db9..b6fbe9db0 100644 --- a/tests/testthat/_snaps/utils-xml.md +++ b/tests/testthat/_snaps/utils-xml.md @@ -3,28 +3,32 @@ Code xml2::xml_find_all(html_test, ".//a[@href]") Output - {xml_nodeset (8)} - [1] a - [2] b - [3] c - [4] d - [5] e - [6] f - [7] g - [8] h + {xml_nodeset (10)} + [1] a + [2] b + [3] c + [4] d + [5] e + [6] f + [7] g + [8] h + [9] i + [10] j --- Code xml2::xml_find_all(res, ".//a[@href]") Output - {xml_nodeset (8)} - [1] a - [2] b - [3] c - [4] d - [5] e - [6] f - [7] g - [8] h + {xml_nodeset (10)} + [1] a + [2] b + [3] c + [4] d + [5] e + [6] f + [7] g + [8] h + [9] i + [10] j diff --git a/tests/testthat/test-utils-xml.R b/tests/testthat/test-utils-xml.R index 302462a19..30e8d60b1 100644 --- a/tests/testthat/test-utils-xml.R +++ b/tests/testthat/test-utils-xml.R @@ -5,18 +5,20 @@ test_that("paths in instructor view that are nested or not HTML get diverted", { "

h

\n", "[a](index.html)", "[b](./index.html)", - "[c](fig/thing.png)", - "[d](./fig/thang.jpg)", - "[e](data/thing.csv)", - "[f](files/papers/thing.pdf)", - "[g](files/confirmation.html)", - "[h](#what-the)" + "[c](fig/thing.png)", # asset + "[d](./fig/thang.jpg)", # asset + "[e](data/thing.csv)", # asset + "[f](files/papers/thing.pdf)", # asset + "[g](files/confirmation.html)", # asset + "[h](#what-the)", + "[i](other-page.html#section)", + "[j](other-page)" ))) res <- xml2::read_html(use_instructor(html_test)) - # All but two refs are transformed according to our rules + # refs are transformed according to our rules refs <- xml2::xml_text(xml2::xml_find_all(res, ".//@href")) expect_equal(startsWith(refs, "../"), - c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE)) + c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE)) expect_snapshot(xml2::xml_find_all(html_test, ".//a[@href]")) expect_snapshot(xml2::xml_find_all(res, ".//a[@href]")) }) From 24657564c21f7e9de009c4dd22aed761ce2f9a50 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 14 Mar 2023 07:34:59 -0700 Subject: [PATCH 8/8] fix credit --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 3cc55a1e2..48c095fa8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ BUG FIX ------- * Links to assets in instructor view no longer render a 404. (reported: - @sarahmbrown, #404; fixed: @zkamvar, #408) + @brownsarahm, #404; fixed: @zkamvar, #408) CONTINUOUS INTEGRATION ----------------------