Skip to content

Commit

Permalink
Automatically dedupe blocklist on push to main (#17808)
Browse files Browse the repository at this point in the history
* create action to auto dedupe

* intentionally add dupe

* temporarily allowlist branch

* actions: auto-cleanup config

* remove temporary allowlist

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: x86 NOP <[email protected]>
  • Loading branch information
3 people authored Mar 8, 2024
1 parent 547aaa9 commit 9dbd0f9
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 10 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/auto-cleanup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Auto Cleanup

permissions:
contents: write

on:
push:
branches:
- main

jobs:
cleanup:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/setup-node@v4
with:
node-version: 20.x
cache: 'yarn'
- run: yarn --frozen-lockfile
- run: |
MM_OUTPUT_PATH=config.json yarn clean:blocklist || true
if [[ `git status --porcelain` ]]; then
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add .
git commit -m 'actions: auto-cleanup config'
git push
fi
9 changes: 7 additions & 2 deletions src/clean-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
const { writeFileSync } = require('fs');
const path = require('path');
const PhishingDetector = require('./detector');
const punycode = require('punycode/');

const SECTION_KEYS = {
blocklist: 'blacklist',
Expand Down Expand Up @@ -59,7 +60,9 @@ const cleanAllowlist = config => {

const cleanBlocklist = config => {
// when cleaning the blocklist, we want to remove domains that are:
// - already present on the blocklist through a less specific match
// - already present on the blocklist through an equal or less specific match
// we also want to:
// - convert all unicode domains to punycode

const blocklistSet = new Set(config[SECTION_KEYS['blocklist']]);

Expand All @@ -73,6 +76,8 @@ const cleanBlocklist = config => {
}

return true;
}).map(domain => {
return punycode.toASCII(domain);
});

return {
Expand Down Expand Up @@ -116,7 +121,7 @@ if (require.main === module) {
// if this is piped to another process, then write to stdout.
// otherwise, write new config to disk.
const result = JSON.stringify(newConfig, undefined, 2)+'\n';
if (process.stdout.isTTY) {
if (process.stdout.isTTY || process.env['MM_OUTPUT_PATH']) {
const destinationPath = process.env['MM_OUTPUT_PATH'] || './config.json';
writeFileSync(path.join(__dirname, destinationPath), result);
} else {
Expand Down
16 changes: 8 additions & 8 deletions test/config.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -364,14 +364,14 @@ function startTests ({ config }) {
test('config exclusively using punycode', (t) => {
testListIsPunycode(t, config.whitelist)
testListIsPunycode(t, config.fuzzylist)
testListIsPunycode(t, config.blacklist)
// testListIsPunycode(t, config.blacklist)
t.end()
})

test('config not repetitive', (t) => {
testListDoesntContainRepeats(t, config.whitelist)
testListDoesntContainRepeats(t, config.fuzzylist)
testListDoesntContainRepeats(t, config.blacklist)
// testListDoesntContainRepeats(t, config.blacklist)
t.end()
})

Expand All @@ -392,12 +392,12 @@ function startTests ({ config }) {
t.end()
})

test('config does not contain redundant entries', (t) => {
testListNoBlocklistRedundancies(t, config)
// FIXME: temporarily disabled due to config propagation inconsistency
// testListNoAllowlistRedundancies(t, config)
t.end()
})
// test('config does not contain redundant entries', (t) => {
// testListNoBlocklistRedundancies(t, config)
// // FIXME: temporarily disabled due to config propagation inconsistency
// // testListNoAllowlistRedundancies(t, config)
// t.end()
// })
}

module.exports = {
Expand Down

0 comments on commit 9dbd0f9

Please sign in to comment.