Skip to content

Commit

Permalink
hf-dataset-repoをjob_specの定義にも使うようにした
Browse files Browse the repository at this point in the history
  • Loading branch information
neet committed Jul 4, 2024
1 parent 40a65c6 commit b5915a8
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/ainu_lm_pipeline/components/get_mt5_training_job_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
def get_mt5_training_job_spec(
train_image_uri: str,
push_to_hub: bool,
dataset_name: str,
dataset_revision: str,
) -> list:
worker_pool_specs = [
Expand All @@ -13,7 +14,7 @@ def get_mt5_training_job_spec(
"image_uri": train_image_uri,
"args": [
"mt5",
"--dataset-name=aynumosir/ainu-corpora-normalized",
f"--dataset-name={dataset_name}",
"--dataset-split=train",
f"--dataset-revision={dataset_revision}",
f"--push-to-hub={push_to_hub}",
Expand Down
1 change: 1 addition & 0 deletions src/ainu_lm_pipeline/pipelines/ainu_mt5_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def ainu_mt5_pipeline(
get_mt5_training_job_spec(
train_image_uri=train_image_uri,
push_to_hub=push_to_hub,
dataset_name=hf_dataset_repo,
dataset_revision=get_dataset_revision_op.output,
)
.after(build_custom_train_image_op)
Expand Down

0 comments on commit b5915a8

Please sign in to comment.