Skip to content

Commit

Permalink
Translate task template to Catalan and Galician and fix typos (#506)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariagrandury authored Jan 22, 2025
1 parent 0140578 commit 1ae2fa2
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 23 deletions.
10 changes: 5 additions & 5 deletions src/lighteval/tasks/default_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def asdiv(line, task_name: str = None):

def babi_qa(line, task_name: str = None): # HELM
def process_path(path: str) -> str:
"""Turn a path string (task 19) from the original format 's,w' to a verbal model-friendly format 'south west'"""
"""Turn a path string (task 19) from the original format 's,w' into a verbal model-friendly format 'south west'"""
steps = path.split(",")
directions = {"s": "south", "n": "north", "e": "east", "w": "west"}
path = " ".join([directions[step] for step in steps])
Expand Down Expand Up @@ -281,7 +281,7 @@ def bbh_logical_deduction_three_objects(line, task_name: str = None):
def bbh_movie_recommendation(line, task_name: str = None):
if line["target"] == "Monsters, Inc": # this line is not correctly formatted
logger.warning(
"One sample removed from task bbh:movie_recommentation because its line is incorrectly formatted."
"One sample removed from task bbh:movie_recommendation because its line is incorrectly formatted."
)
return []
instruction = "Recommend movies similar to the given list of movies.\n\n"
Expand Down Expand Up @@ -500,7 +500,7 @@ def civil_comments(line, task_name: str = None):
def cnn_dm(line, task_name: str = None):
return Doc(
task_name=task_name,
query=f"###\nArticle:{line['article']}\n\nSummarize the above article in 3 sentence.\n",
query=f"###\nArticle:{line['article']}\n\nSummarize the above article in 3 sentences.\n",
choices=[str(line["summary"])],
gold_index=0,
specific={"text": line["article"]},
Expand Down Expand Up @@ -730,7 +730,7 @@ def gpqa(line, task_name: str = None):


def gsm8k(line, task_name: str = None):
# Has special analysis in metric for number decomposiition
# Has special analysis in metric for number decomposition
return Doc(
task_name=task_name,
query=f"Question: {line['question']}\nAnswer:",
Expand Down Expand Up @@ -2076,7 +2076,7 @@ def rte(line, task_name: str = None):
return Doc(
task_name=task_name,
query=f"{line['sentence1']}\nQuestion: {line['sentence2']} True or False?\nAnswer:",
choices=[" True", " False"], # 0 = entailement, 1 = not entailment
choices=[" True", " False"], # 0 = entailment, 1 = not entailment
gold_index=int(line["label"]),
# "metric": "choices_loglikelihood",
)
Expand Down
4 changes: 2 additions & 2 deletions src/lighteval/tasks/lighteval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ def create_requests_from_tasks( # noqa: C901
n_samples = min(max_samples, len(task_docs)) if max_samples else len(task_docs)
evaluation_tracker.task_config_logger.log_num_docs(task_name, len(task_docs), n_samples)

# logs out the diferent versions of the tasks for every few shot
# logs out the different versions of the tasks for every few shot
for num_fewshot, _ in fewshot_dict[task_name]:
cur_task_name = f"{task_name}|{num_fewshot}"
evaluation_tracker.versions_logger.log(cur_task_name, task.version)
Expand All @@ -633,7 +633,7 @@ def create_requests_from_tasks( # noqa: C901
prompt_manager = PromptManager(lm=lm, task=task)
seeds = prompt_manager.few_shot_sampler.get_fewshot_seeds(num_fewshot_seeds)

# We can do several round of fewshots sampling to get some variance informations
# We can do several round of fewshots sampling to get some variance information
for seed in seeds:
for doc_id in range(n_samples):
doc_id_seed = f"{doc_id}_{seed}" # if we do several rounds of few shot sampling we have several seeds
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/tasks/prompt_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def _multi_turn_contexts(self, doc: Doc, use_chat_template: bool, system_prompt:
Multi turn tasks need use chat templating.
Args:
doc (Doc): Formated document.
doc (Doc): Formatted document.
use_chat_template (bool): wether or not to use chat template. Will fail if false.
system_prompt (Optional[str]): The system prompt to use
tokenizer (PreTrainedTokenizer): The tokenizer used for the chat template
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/tasks/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _task_superset_dict(self):
"lighteval|mmlu" -> ["lighteval|mmlu:abstract_algebra", "lighteval|mmlu:college_biology", ...]
}
"""
# Note: sorted before groupby is imporant as the python implementation of groupby does not
# Note: sorted before groupby is important as the python implementation of groupby does not
# behave like sql groupby. For more info see the docs of itertools.groupby
superset_dict = {k: list(v) for k, v in groupby(sorted(self.task_registry.keys()), lambda x: x.split(":")[0])}
# Only consider supersets with more than one task
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/tasks/templates/continuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def get_continuation_prompt_function(
C. Continuation 3
Answer: A/B/C
This template is very similar to the `Multiple Choice` template, except that it only takes context/continuations as input and don't use the anchor labels (Question/Answer)
This template is very similar to the `Multiple Choice` template, except that it only takes context/continuations as input and doesn't use the anchor labels (Question/Answer)
Args:
language (Language): The language of the Continuation task.
Expand Down
6 changes: 3 additions & 3 deletions src/lighteval/tasks/templates/copa.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,17 @@ def get_copa_prompt_function(
Format:
*CF*
Context Premise thefore/cause | (Continuation 1, Continuation 2, Continuation 3)
Context Premise therefore/cause | (Continuation 1, Continuation 2, Continuation 3)
*Hybrid*
Context Premise thefore/cause
Context Premise therefore/cause
A. Continuation 1
B. Continuation 2
C. Continuation 3
Answer: | Continuation 1/Continuation 2/Continuation 3
*MCF*
Context Premise thefore/cause
Context Premise therefore/cause
A. Continuation 1
B. Continuation 2
C. Continuation 3
Expand Down
4 changes: 2 additions & 2 deletions src/lighteval/tasks/templates/hellaswag.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def get_hellaswag_prompt_function(
Create a templated prompt function for a Hellaswag task.
Format:
Context Premise thefore/cause | (Continuation 1, Continuation 2, Continuation 3)
Context Premise therefore/cause | (Continuation 1, Continuation 2, Continuation 3)
Args:
language (Language): The language of the Hellaswag task.
Expand Down Expand Up @@ -126,7 +126,7 @@ def hellaswag_prompt(
if ctx_b:
ctx_a = join_ctxs(ctx_a, ctx_b)

# Removoal of the [header] can happen and we need the first letter to be capital afterwards
# Removal of the [header] can happen and we need the first letter to be capital afterwards
full_context = HELLASWAG_QUERY.format(activity_label=activity_label, ctx=ctx_a)
choices = [
hellaswag_preprocess(
Expand Down
12 changes: 6 additions & 6 deletions src/lighteval/tasks/templates/nli.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,23 +228,23 @@ def prompt_fn(line: dict, task_name: str):
if input_data is None:
return None

# Template based on dicussion here: https://github.com/EleutherAI/lm-evaluation-harness/issues/450
# Template based on discussion here: https://github.com/EleutherAI/lm-evaluation-harness/issues/450
labels = [capitalize(get_relation_label(label, translation_literals)) for label in relations]

premise, hypothesis, gold_idx = input_data["premise"], input_data["hypothesis"], input_data["gold_idx"]
premise = fix_ending_punct(capitalize(input_data["premise"]), translation_literals)
hypothesis = input_data["hypothesis"]
if isinstance(formulation, HybridFormulation):
# If we have the neither option move it to the end to be consistent with standard NLI evaluation
rearanged_labales = labels
rearranged_labels = labels
if "neutral" in relations:
neutral_idx = relations.index("neutral")
rearanged_labales = labels[:neutral_idx] + labels[neutral_idx + 1 :] + [labels[neutral_idx]]
rearranged_labels = labels[:neutral_idx] + labels[neutral_idx + 1 :] + [labels[neutral_idx]]

choices_str = f"{translation_literals.comma}{translation_literals.word_space}".join(rearanged_labales[:-1])
hypothesis = f"{hypothesis.rstrip(PUNCT)}{translation_literals.sentence_space}{choices_str}{translation_literals.word_space}{translation_literals.or_word}{translation_literals.word_space}{rearanged_labales[-1]}{translation_literals.question_mark}"
choices_str = f"{translation_literals.comma}{translation_literals.word_space}".join(rearranged_labels[:-1])
hypothesis = f"{hypothesis.rstrip(PUNCT)}{translation_literals.sentence_space}{choices_str}{translation_literals.word_space}{translation_literals.or_word}{translation_literals.word_space}{rearranged_labels[-1]}{translation_literals.question_mark}"

# (hynky1999): Ideally we would not compute logprobs of the Yes/No/Also in CF fomulation. However as of right now lighteval doesn't allow to
# (hynky1999): Ideally we would not compute logprobs of the Yes/No/Also in CF formulation. However as of right now lighteval doesn't allow to
# use multi-context.
row = {
"instruction": input_data.get("instruction", ""),
Expand Down
48 changes: 46 additions & 2 deletions src/lighteval/tasks/templates/utils/translation_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,29 @@ def __getattribute__(self, name: str) -> str:
Language.BRETON: TranslationLiterals(language=Language.BRETON),
Language.BULGARIAN: TranslationLiterals(language=Language.BULGARIAN),
Language.BURMESE: TranslationLiterals(language=Language.BURMESE),
Language.CATALAN: TranslationLiterals(language=Language.CATALAN),
Language.CATALAN: TranslationLiterals(
language=Language.CATALAN,
question_word="pregunta",
answer="resposta",
confirmation_word="cert",
yes="sí",
no="no",
also="també",
cause_word="perquè",
effect_word="per tant",
or_word="o",
true="veritable",
false="fals",
neither="cap",
full_stop=".",
comma=",",
question_mark="?",
exclamation_mark="!",
word_space=" ",
sentence_space=" ",
colon=":",
semicolon=";",
),
Language.CEBUANO: TranslationLiterals(language=Language.CEBUANO),
Language.CHINESE: TranslationLiterals(
language=Language.CHINESE,
Expand Down Expand Up @@ -348,7 +370,29 @@ def __getattribute__(self, name: str) -> str:
sentence_space=" ",
colon=":",
),
Language.GALICIAN: TranslationLiterals(language=Language.GALICIAN),
Language.GALICIAN: TranslationLiterals(
language=Language.GALICIAN,
question_word="pregunta",
answer="resposta",
confirmation_word="certo",
yes="si",
no="non",
also="tamén",
cause_word="porque",
effect_word="polo tanto",
or_word="ou",
true="verdadeiro",
false="falso",
neither="ningún",
full_stop=".",
comma=",",
question_mark="?",
exclamation_mark="!",
word_space=" ",
sentence_space=" ",
colon=":",
semicolon=";",
),
Language.GEORGIAN: TranslationLiterals(language=Language.GEORGIAN),
Language.GERMAN: TranslationLiterals(
language=Language.GERMAN,
Expand Down

0 comments on commit 1ae2fa2

Please sign in to comment.