From 8c02f86c30872090d9f6ab67a4a662af56292dee Mon Sep 17 00:00:00 2001 From: Loubna Ben Allal <44069155+loubnabnl@users.noreply.github.com> Date: Tue, 28 Mar 2023 12:29:30 +0200 Subject: [PATCH] Update README.md --- pii/ner/pii_redaction/README.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pii/ner/pii_redaction/README.md b/pii/ner/pii_redaction/README.md index 596413b..370b38a 100644 --- a/pii/ner/pii_redaction/README.md +++ b/pii/ner/pii_redaction/README.md @@ -1,13 +1,12 @@ # PII redaction - +To run PII redaction on a dataset that went though PII detection with this [NER model](https://huggingface.co/bigcode/bigcode-encoder-pii-ner-v2). ```bash -LANG=jupyter-scripts-dedup-filtered -python main_redact.py --dataset_name /fsx/leandro/data/pii_result/$LANG --target_dataset $LANG-no-pii --save_path_disk $LANG-no-pii-local +LANG=python +python main_redact.py --dataset_name $DATA_PATH --target_dataset $LANG-no-pii --save_path_disk $LANG-no-pii-local ``` -/fsx/leandro/data/pii_result/jupyter-scripts-dedup-filtered -/fsx/leandro/data/pii_result/jupyter-structured-clean-dedup -/fsx/leandro/data/pii_result/github-issues-filtered-structured + +To run multiple `slurm` jobs for each programming language ```bash -python main_redact.py --dataset_name /fsx/leandro/data/pii_result/$LANG --target_dataset $LANG-no-pii --save_path_disk $LANG-no-pii-local -``` \ No newline at end of file +python run_pii_slurm.py --start 0 --end 88 +```