Skip to content

Commit

Permalink
change gibberish modle path
Browse files Browse the repository at this point in the history
  • Loading branch information
loubnabnl committed Aug 24, 2023
1 parent b824c1a commit 7dbe1c6
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion pii/utils/emails_ip_addresses_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from gibberish_detector import detector

# Regexes for PII detection
GIBBERISH_MODEL_PATH = './gibberish_data/big.model'

year_patterns = [
regex.compile(
Expand Down Expand Up @@ -149,7 +150,7 @@ def is_gibberish(matched_str):
# pip install gibberish-detector
# download the training corpora from https://raw.githubusercontent.com/domanchi/gibberish-detector/master/examples/big.txt
# run gibberish-detector train big.txt > big.model to generate the model (it takes 3 seconds)
Detector = detector.create_from_model('gibberish_data/big.model')
Detector = detector.create_from_model(GIBBERISH_MODEL_PATH)
return Detector.is_gibberish(matched_str.lower())


Expand Down

0 comments on commit 7dbe1c6

Please sign in to comment.