From bb784e800094a9d2992af5c6c0c2b171569ee75d Mon Sep 17 00:00:00 2001 From: RainRat Date: Tue, 2 Jan 2024 03:23:19 -0800 Subject: [PATCH] fix typos --- data/datasets/safety_directory/child_help/child_help.py | 6 +++--- model/pretokenizer/README.md | 2 +- notebooks/data-augmentation/unified-qa/unified-qa.ipynb | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/data/datasets/safety_directory/child_help/child_help.py b/data/datasets/safety_directory/child_help/child_help.py index 62954a22a3..eccca36cf8 100644 --- a/data/datasets/safety_directory/child_help/child_help.py +++ b/data/datasets/safety_directory/child_help/child_help.py @@ -195,7 +195,7 @@ "Ligne Verte 147 Madagascar": { "region": "Madagascar", "page": "https://childhelplineinternational.org/madagascar-ligne-verte-147-madagascar/", - "description": "Ligne Verte 147 is a child helpline for reporting cases of mistreatment, violence, abuse and exploitation against children and is is free, available 24/7 and accessible everywhere in Madagascar.", + "description": "Ligne Verte 147 is a child helpline for reporting cases of mistreatment, violence, abuse and exploitation against children and is free, available 24/7 and accessible everywhere in Madagascar.", "contacts": { "Website": {"type": "website", "link": "https://arozaza.mg/"}, "147": {"type": "phone", "link": "tel:147"}, @@ -529,7 +529,7 @@ "Línea Libre": { "region": "Chile", "page": "https://childhelplineinternational.org/chile-linea-libre/", - "description": "Línea Libre is is a psychological support channel aimed at girls, boys and young people, which is attended directly by psychologists trained to contain, guide, intervene in crises, and address mental health concerns or rights violations. It is available Monday to Saturday from 10:00 a.m. to 10:00 p.m. through three channels: phone email, and chat via our app.", + "description": "Línea Libre is a psychological support channel aimed at girls, boys and young people, which is attended directly by psychologists trained to contain, guide, intervene in crises, and address mental health concerns or rights violations. It is available Monday to Saturday from 10:00 a.m. to 10:00 p.m. through three channels: phone email, and chat via our app.", "contacts": { "Website": {"type": "website", "link": "http://www.linealibre.cl/"}, "1515": {"type": "phone", "link": "tel:1515"}, @@ -2110,7 +2110,7 @@ "Hotline 919": { "region": "Qatar", "page": "https://childhelplineinternational.org/qatar-hotline-919/", - "description": "Hotline 919 provides provides free confidential consultations (social, psychological and legal) for women and children and also provides support to protect and rehabilitate children and women who are victims of violence and family breakdown.", + "description": "Hotline 919 provides free confidential consultations (social, psychological and legal) for women and children and also provides support to protect and rehabilitate children and women who are victims of violence and family breakdown.", "contacts": { "Website": {"type": "website", "link": "http://www.aman.org.qa/"}, "919": {"type": "phone", "link": "tel:919"}, diff --git a/model/pretokenizer/README.md b/model/pretokenizer/README.md index 6c89e8da51..9429e652d4 100644 --- a/model/pretokenizer/README.md +++ b/model/pretokenizer/README.md @@ -19,7 +19,7 @@ python -m pip install ../../oasst-data/ ### Configuration -The datamix to proces can be configured with one or multiple sections in the +The datamix to process can be configured with one or multiple sections in the `configs/pretokenize.yaml` file. ### Example usage diff --git a/notebooks/data-augmentation/unified-qa/unified-qa.ipynb b/notebooks/data-augmentation/unified-qa/unified-qa.ipynb index ce9cbed6e8..95995d5e7e 100644 --- a/notebooks/data-augmentation/unified-qa/unified-qa.ipynb +++ b/notebooks/data-augmentation/unified-qa/unified-qa.ipynb @@ -1025,7 +1025,7 @@ "def convert_unified_qa(dataset_url):\n", " # download using pandas\n", " ds = pd.read_csv(dataset_url, on_bad_lines=\"skip\", names=[\"Question\", \"Answer\"], sep=\"\\t\")\n", - " # get name for metatdata\n", + " # get name for metadata\n", " ds_name = dataset_url.split(\"/unifiedqa/data/\")[1].split(\"/\")[0]\n", " # get conversation templates list\n", " conv_funcs = converter_functions[ds_name]\n",