From ef46dbf05400abb4afae0d84b7fc8922ab9292cd Mon Sep 17 00:00:00 2001 From: Amir Pourmand Date: Fri, 4 Aug 2023 09:34:44 +0000 Subject: [PATCH 1/3] add alpaca --- data/datasets/__init__.py | 1 + model/model_training/custom_datasets/instruction.py | 1 + 2 files changed, 2 insertions(+) diff --git a/data/datasets/__init__.py b/data/datasets/__init__.py index 7b2c077ea4..721c29bdab 100644 --- a/data/datasets/__init__.py +++ b/data/datasets/__init__.py @@ -34,6 +34,7 @@ "reasoning_bg_oa": "0x22almostEvil/reasoning_bg_oa", "reasoning_gsm_qna_oa": "0x22almostEvil/reasoning-gsm-qna-oa", "semantics_ws_qna_oa": "0x22almostEvil/semantics-ws-qna-oa", + "alpaca-fa-instruction": "pourmand1376/alpaca-fa-instruction", } SAFETY_DATASETS = { diff --git a/model/model_training/custom_datasets/instruction.py b/model/model_training/custom_datasets/instruction.py index 7b6ad39787..a746940dc0 100644 --- a/model/model_training/custom_datasets/instruction.py +++ b/model/model_training/custom_datasets/instruction.py @@ -32,6 +32,7 @@ "evol_instruct_code": "nickrosh/Evol-Instruct-Code-80k-v1", "evol-codealpaca-v1": "theblackcat102/evol-codealpaca-v1", "cot_submix_original": "conceptofmind/cot_submix_original", + "alpaca-fa-instruction": "pourmand1376/alpaca-fa-instruction", } From 3d269e4b78b67d942180d2c13535638e8c089971 Mon Sep 17 00:00:00 2001 From: Amir Pourmand Date: Fri, 4 Aug 2023 09:34:59 +0000 Subject: [PATCH 2/3] add alpaca --- data/datasets/alpaca-fa-instruction/README.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 data/datasets/alpaca-fa-instruction/README.md diff --git a/data/datasets/alpaca-fa-instruction/README.md b/data/datasets/alpaca-fa-instruction/README.md new file mode 100644 index 0000000000..ba76e71d10 --- /dev/null +++ b/data/datasets/alpaca-fa-instruction/README.md @@ -0,0 +1,2 @@ +This is a persian instruction dataset. The dataset is uploaded +[here](https://huggingface.co/datasets/pourmand1376/alpaca-fa-instruction). From c2166e4a7cf51851a16fc4a760316be1d7ce42f2 Mon Sep 17 00:00:00 2001 From: Amir Pourmand Date: Fri, 4 Aug 2023 09:38:06 +0000 Subject: [PATCH 3/3] add alpaca multi --- data/datasets/alpaca-fa-multi/README.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 data/datasets/alpaca-fa-multi/README.md diff --git a/data/datasets/alpaca-fa-multi/README.md b/data/datasets/alpaca-fa-multi/README.md new file mode 100644 index 0000000000..8128647b41 --- /dev/null +++ b/data/datasets/alpaca-fa-multi/README.md @@ -0,0 +1,4 @@ +This is an multi-turn persian dataset which is in +[orca-chat](https://huggingface.co/datasets/shahules786/orca-chat) format. It is +published in +[huggingface](https://huggingface.co/datasets/pourmand1376/alpaca-fa-multi).