From 52a15e8047004156a49717695bf272a5cefa1c37 Mon Sep 17 00:00:00 2001 From: Loubna Ben Allal <44069155+loubnabnl@users.noreply.github.com> Date: Tue, 28 Mar 2023 12:30:20 +0200 Subject: [PATCH] Update utils.py --- pii/ner/pii_redaction/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pii/ner/pii_redaction/utils.py b/pii/ner/pii_redaction/utils.py index 9927596..6ecdd27 100644 --- a/pii/ner/pii_redaction/utils.py +++ b/pii/ner/pii_redaction/utils.py @@ -46,7 +46,7 @@ def is_key(matched_str): # download the training corpora from https://raw.githubusercontent.com/domanchi/gibberish-detector/master/examples/big.txt # run gibberish-detector train big.txt > big.model to generate the model (it takes 3 seconds) Detector = detector.create_from_model( - "/fsx/loubna/code/bigcode-dataset/pii/gibberish_data/big.model" + "/bigcode-dataset/pii/gibberish_data/big.model" ) is_gibberish = Detector.is_gibberish(matched_str.lower()) return is_gibberish and len(matched_str) > 8