From a0b505e262ebca2b71f674e380e5cb950f57267a Mon Sep 17 00:00:00 2001 From: David Zero Date: Mon, 11 Sep 2023 20:44:40 -0400 Subject: [PATCH] deps: Remove ICU data dep Builds ICU data into a static library as part of the build instead of fetching a binary data release archive, and links the library into the URL lib. --- .bazelrc | 4 +- .github/workflows/ci.yaml | 4 +- WORKSPACE | 16 ++-- third_party/icu.BUILD | 161 +++++++++++++++++++++++++++++++++++++- url/BUILD | 19 ++++- url/url.cpp | 35 --------- 6 files changed, 184 insertions(+), 55 deletions(-) diff --git a/.bazelrc b/.bazelrc index 47d3bb3a7..9e0359290 100644 --- a/.bazelrc +++ b/.bazelrc @@ -2,7 +2,6 @@ # ========================================================= build --enable_platform_specific_config -build --test_env=HASTUR_ICU_DATA=external/icu-data/ coverage --combined_report=lcov test --test_output=errors test --test_summary=terse @@ -32,11 +31,14 @@ build:linux --cxxopt='-fno-rtti' build:linux --copt='-gdwarf-4' build:windows --enable_runfiles +build:windows --action_env=LOCALAPPDATA # Quirk for running vswhere, remove when icu no-longer needed +build:windows --action_env=ProgramData # Quirk for running vswhere, remove when icu no-longer needed build:windows --cxxopt='/std:c++latest' build:windows --cxxopt='/GR-' # Disable rtti. build:windows --copt='/permissive-' # Conform to the standard. build:windows --copt='/Zc:__cplusplus' # Report the real supported C++ version, not just C++98. build:windows --copt='-utf-8' # Use UTF-8 as the source and execution character sets. +build:windows --host_copt='-utf-8' # Use UTF-8 as the source and execution character sets. # Special build options # ========================================================= diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9b9cbe963..fc4df2341 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -173,7 +173,7 @@ jobs: # Include all targets except for # * py_test targets: not fully statically linked # * targets that depend on sfml: it pulls in host dependencies. - - run: ./bazelisk test -- $(bazel query '... except (kind("py_test", ...) union rdeps(..., @sfml//:window))') + - run: ./bazelisk test -- $(bazel query '... except (kind("py_test", ...) union rdeps(..., @sfml//:window) union rdeps(..., @icu//:common))') - name: Run tui run: | echo "

Example

This is an example page.

" >example.html @@ -181,7 +181,7 @@ jobs: windows-msvc: runs-on: windows-2022 - timeout-minutes: 30 + timeout-minutes: 32 defaults: run: shell: bash diff --git a/WORKSPACE b/WORKSPACE index 9d36845a6..50cd0f671 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -128,18 +128,12 @@ http_archive( patch_cmds = [ "rm source/common/BUILD.bazel", "rm source/stubdata/BUILD.bazel", + "rm source/tools/toolutil/BUILD.bazel", + "rm source/i18n/BUILD.bazel", ], - sha256 = "818a80712ed3caacd9b652305e01afc7fa167e6f2e94996da44b90c2ab604ce1", - strip_prefix = "icu", - url = "https://github.com/unicode-org/icu/releases/download/release-73-2/icu4c-73_2-src.tgz", -) - -# https://github.com/unicode-org/icu -http_archive( - name = "icu-data", # Unicode-DFS-2016 - build_file_content = """exports_files(["icudt73l.dat"])""", - sha256 = "2657bd18c23b930ddf63f466192832cc083256515e07b5a5e7d79c5c1db058a1", - url = "https://github.com/unicode-org/icu/releases/download/release-73-2/icu4c-73_2-data-bin-l.zip", + sha256 = "4b6c4a79b0648d228d505601e58780a59e9ad4eaad54be75cc637bd635aa46d6", + strip_prefix = "icu-release-73-2/icu4c", + url = "https://github.com/unicode-org/icu/archive/refs/tags/release-73-2.zip", ) # https://github.com/ocornut/imgui diff --git a/third_party/icu.BUILD b/third_party/icu.BUILD index bbec5c462..894f72b41 100644 --- a/third_party/icu.BUILD +++ b/third_party/icu.BUILD @@ -1,4 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_library") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") cc_library( name = "common", @@ -9,6 +9,7 @@ cc_library( "source/stubdata/*.cpp", ]), hdrs = glob([ + "source/common/*.h", "source/common/unicode/*.h", ]), copts = select({ @@ -33,10 +34,8 @@ cc_library( }), defines = [ "U_STATIC_IMPLEMENTATION", - "U_COMMON_IMPLEMENTATION", "U_CHARSET_IS_UTF8=1", "U_HIDE_OBSOLETE_UTF_OLD_H=1", - "UCONFIG_NO_CONVERSION=1", ], linkopts = select({ "@platforms//os:windows": [ @@ -45,6 +44,162 @@ cc_library( "//conditions:default": ["-ldl"], }), linkstatic = True, + local_defines = [ + "U_COMMON_IMPLEMENTATION", + ], strip_include_prefix = "source/common/", visibility = ["//visibility:public"], ) + +cc_library( + name = "toolutil", + srcs = glob(["source/tools/toolutil/*.cpp"]), + hdrs = glob(["source/tools/toolutil/*.h"]), + copts = select({ + "@platforms//os:windows": [ + "/GR", + ], + "//conditions:default": [ + "-frtti", + ], + }), + linkstatic = True, + local_defines = ["U_TOOLUTIL_IMPLEMENTATION"] + select({ + "@platforms//os:windows": [], + "//conditions:default": [ + "U_ELF", + ], + }), + strip_include_prefix = "source/tools/toolutil", + visibility = ["//visibility:private"], + deps = [ + ":common", + ":i18n", + ], +) + +cc_library( + name = "i18n", + srcs = glob(["source/i18n/*.cpp"]), + hdrs = glob([ + "source/i18n/*.h", + "source/i18n/unicode/*.h", + ]), + copts = select({ + "@platforms//os:windows": [ + "/GR", + ], + "//conditions:default": [ + "-frtti", + ], + }), + linkstatic = True, + local_defines = [ + "U_I18N_IMPLEMENTATION", + ], + strip_include_prefix = "source/i18n", + visibility = ["//visibility:private"], + deps = [":common"], +) + +cc_binary( + name = "gensprep", + srcs = glob(["source/tools/gensprep/*.c"]) + ["source/tools/gensprep/gensprep.h"], + visibility = ["//visibility:private"], + deps = [ + ":common", + ":i18n", + ":toolutil", + ], +) + +SPREP_DATA = glob(["source/data/sprep/*.txt"]) + +SPREP_DATA_COMPILED = [s.replace("txt", "spp").rpartition("/")[2] for s in SPREP_DATA] + +filegroup( + name = "normalizations", + srcs = ["source/data/unidata/NormalizationCorrections.txt"], +) + +[genrule( + name = "run_sprep_" + input.replace(".txt", "").rpartition("/")[2], + srcs = [input], + outs = [input.replace("txt", "spp").rpartition("/")[2]], + cmd = "./$(location gensprep) --destdir $(RULEDIR) --bundle-name " + input.replace(".txt", "").rpartition("/")[2] + " --norm-correction external/icu/source/data/unidata/ --unicode 15.0.0 $<", + tools = [ + ":gensprep", + ":normalizations", + ], + visibility = ["//visibility:private"], +) for input in SPREP_DATA] + +genrule( + name = "create_pkgdata_lst", + srcs = SPREP_DATA_COMPILED, + outs = ["pkgdata.lst"], + cmd = "echo -e \"" + "\\n".join(SPREP_DATA_COMPILED) + "\" > $(RULEDIR)/pkgdata.lst && echo uts46.nrm >> $(RULEDIR)/pkgdata.lst", +) + +genrule( + name = "move uts46.nrm", + srcs = ["source/data/in/uts46.nrm"], + outs = ["uts46.nrm"], + cmd = "cp $< $(RULEDIR)", +) + +cc_binary( + name = "icupkg", + srcs = ["source/tools/icupkg/icupkg.cpp"], + visibility = ["//visibility:private"], + deps = [ + ":common", + ":i18n", + ":toolutil", + ], +) + +cc_binary( + name = "pkgdata", + srcs = [ + "source/tools/pkgdata/pkgdata.cpp", + "source/tools/pkgdata/pkgtypes.c", + "source/tools/pkgdata/pkgtypes.h", + ], + visibility = ["//visibility:private"], + deps = [ + ":common", + ":i18n", + ":toolutil", + ], +) + +genrule( + name = "run_pkgdata", + srcs = [ + "pkgdata.lst", + "uts46.nrm", + ] + SPREP_DATA_COMPILED, + outs = ["libicudt73l.a"], + cmd = r"""srcs=($(SRCS)); export PATH=$$PATH:$(location icupkg); $(location pkgdata) --entrypoint icudt73 --sourcedir $(RULEDIR) --destdir $(RULEDIR) --name icudt73l --mode static $${srcs[0]}""", + tools = [ + ":icupkg", + ":pkgdata", + ], + visibility = ["//visibility:public"], +) + +genrule( + name = "run_pkgdata_windows", + srcs = [ + "pkgdata.lst", + "uts46.nrm", + ] + SPREP_DATA_COMPILED, + outs = ["sicudt73l.lib"], + cmd = r"""srcs=($(SRCS)); export PATH=$$PATH:$(location icupkg):"/$$('C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe' -latest -prerelease -find '**\lib.exe' | grep x64 | grep -v llvm | head -n1 | awk -F '\' 'BEGIN{OFS=FS} {$$NF=""; print}' | tr -d ':' | tr '\134' '/')"; $(location pkgdata) --entrypoint icudt73 --sourcedir $(RULEDIR) --destdir $(RULEDIR) --name icudt73l --mode static $${srcs[0]}""", + tools = [ + ":icupkg", + ":pkgdata", + ], + visibility = ["//visibility:public"], +) diff --git a/url/BUILD b/url/BUILD index df4e278e9..80123cf04 100644 --- a/url/BUILD +++ b/url/BUILD @@ -1,7 +1,17 @@ -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library", "cc_test") load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test") load("//bzl:copts.bzl", "HASTUR_COPTS", "HASTUR_FUZZ_PLATFORMS") +cc_import( + name = "icudata", + static_library = select({ + "@platforms//os:windows": "@icu//:sicudt73l.lib", + "//conditions:default": "@icu//:libicudt73l.a", + }), + visibility = ["//visibility:private"], + alwayslink = True, +) + cc_library( name = "rtti_hack", srcs = ["rtti_hack.cpp"], @@ -15,18 +25,21 @@ cc_library( cc_library( name = "url", - srcs = ["url.cpp"], + srcs = [ + "url.cpp", + ], hdrs = ["url.h"], copts = HASTUR_COPTS, - data = ["@icu-data//:icudt73l.dat"], visibility = ["//visibility:public"], deps = [ + ":icudata", ":rtti_hack", "//util:base_parser", "//util:string", "//util:unicode", "//util:uuid", "@icu//:common", + #"@icu//:icudata", "@spdlog", ], ) diff --git a/url/url.cpp b/url/url.cpp index 3b988f9be..9eee0bd71 100644 --- a/url/url.cpp +++ b/url/url.cpp @@ -118,39 +118,6 @@ struct PercentEncodeSet { static constexpr bool component(char c) { return userinfo(c) || (c >= '$' && c <= '&') || c == '+' || c == ','; } }; -void icu_init() { - static std::atomic called_once = false; - - if (called_once.exchange(true)) { - return; - } - - // NOLINTNEXTLINE(concurrency-mt-unsafe): This is going away soon. - char *data = std::getenv("HASTUR_ICU_DATA"); - - if (data != nullptr) { - std::filesystem::path env_path{data}; - - if (std::filesystem::is_directory(env_path)) { - u_setDataDirectory(env_path.string().c_str()); - } - } else { - // Use current working directory as a last resort. - // TODO(zero-one): Look at engine config for paths. - u_setDataDirectory(std::filesystem::current_path().string().c_str()); - } - - UErrorCode err = U_ZERO_ERROR; - - std::uint32_t opts = - UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_USE_STD3_RULES; - - [[maybe_unused]] auto *uts = icu::IDNA::createUTS46Instance(opts, err); - - assert(!U_FAILURE(err)); - - delete uts; -} } // namespace void icu_cleanup() { @@ -1189,8 +1156,6 @@ void UrlParser::state_fragment() { // https://url.spec.whatwg.org/#concept-domain-to-ascii std::optional UrlParser::domain_to_ascii(std::string_view domain, bool be_strict) const { - icu_init(); - std::string ascii_domain; icu::StringByteSink tmp{&ascii_domain};