From 9dbd0f9d3d2438b89030f4556c778640e47ebcd2 Mon Sep 17 00:00:00 2001 From: samczsun Date: Thu, 7 Mar 2024 20:12:40 -0800 Subject: [PATCH] Automatically dedupe blocklist on push to main (#17808) * create action to auto dedupe * intentionally add dupe * temporarily allowlist branch * actions: auto-cleanup config * remove temporary allowlist --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: x86 NOP <135667356+x86NOP@users.noreply.github.com> --- .github/workflows/auto-cleanup.yml | 32 ++++++++++++++++++++++++++++++ src/clean-config.js | 9 +++++++-- test/config.test.js | 16 +++++++-------- 3 files changed, 47 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/auto-cleanup.yml diff --git a/.github/workflows/auto-cleanup.yml b/.github/workflows/auto-cleanup.yml new file mode 100644 index 00000000000..f8454c1f849 --- /dev/null +++ b/.github/workflows/auto-cleanup.yml @@ -0,0 +1,32 @@ +name: Auto Cleanup + +permissions: + contents: write + +on: + push: + branches: + - main + +jobs: + cleanup: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + token: ${{ secrets.GITHUB_TOKEN }} + - uses: actions/setup-node@v4 + with: + node-version: 20.x + cache: 'yarn' + - run: yarn --frozen-lockfile + - run: | + MM_OUTPUT_PATH=config.json yarn clean:blocklist || true + if [[ `git status --porcelain` ]]; then + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add . + git commit -m 'actions: auto-cleanup config' + git push + fi diff --git a/src/clean-config.js b/src/clean-config.js index 05c93348366..4411cfc9f37 100755 --- a/src/clean-config.js +++ b/src/clean-config.js @@ -2,6 +2,7 @@ const { writeFileSync } = require('fs'); const path = require('path'); const PhishingDetector = require('./detector'); +const punycode = require('punycode/'); const SECTION_KEYS = { blocklist: 'blacklist', @@ -59,7 +60,9 @@ const cleanAllowlist = config => { const cleanBlocklist = config => { // when cleaning the blocklist, we want to remove domains that are: - // - already present on the blocklist through a less specific match + // - already present on the blocklist through an equal or less specific match + // we also want to: + // - convert all unicode domains to punycode const blocklistSet = new Set(config[SECTION_KEYS['blocklist']]); @@ -73,6 +76,8 @@ const cleanBlocklist = config => { } return true; + }).map(domain => { + return punycode.toASCII(domain); }); return { @@ -116,7 +121,7 @@ if (require.main === module) { // if this is piped to another process, then write to stdout. // otherwise, write new config to disk. const result = JSON.stringify(newConfig, undefined, 2)+'\n'; - if (process.stdout.isTTY) { + if (process.stdout.isTTY || process.env['MM_OUTPUT_PATH']) { const destinationPath = process.env['MM_OUTPUT_PATH'] || './config.json'; writeFileSync(path.join(__dirname, destinationPath), result); } else { diff --git a/test/config.test.js b/test/config.test.js index fa55517c9e0..db48766306c 100644 --- a/test/config.test.js +++ b/test/config.test.js @@ -364,14 +364,14 @@ function startTests ({ config }) { test('config exclusively using punycode', (t) => { testListIsPunycode(t, config.whitelist) testListIsPunycode(t, config.fuzzylist) - testListIsPunycode(t, config.blacklist) + // testListIsPunycode(t, config.blacklist) t.end() }) test('config not repetitive', (t) => { testListDoesntContainRepeats(t, config.whitelist) testListDoesntContainRepeats(t, config.fuzzylist) - testListDoesntContainRepeats(t, config.blacklist) + // testListDoesntContainRepeats(t, config.blacklist) t.end() }) @@ -392,12 +392,12 @@ function startTests ({ config }) { t.end() }) - test('config does not contain redundant entries', (t) => { - testListNoBlocklistRedundancies(t, config) - // FIXME: temporarily disabled due to config propagation inconsistency - // testListNoAllowlistRedundancies(t, config) - t.end() - }) + // test('config does not contain redundant entries', (t) => { + // testListNoBlocklistRedundancies(t, config) + // // FIXME: temporarily disabled due to config propagation inconsistency + // // testListNoAllowlistRedundancies(t, config) + // t.end() + // }) } module.exports = {