Skip to content

Commit

Permalink
Add Legal Opinion Sentiment Classification scenario (#3286)
Browse files Browse the repository at this point in the history
Co-authored-by: Ryo Kawahara <[email protected]>
Co-authored-by: Mikio Takeuchi <[email protected]>
  • Loading branch information
3 people authored Jan 29, 2025
1 parent 92e3ee1 commit 59dcfb1
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 3 deletions.
28 changes: 25 additions & 3 deletions src/helm/benchmark/run_specs/enterprise_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _get_weighted_classification_metric_specs(labels: List[str]) -> List[MetricS
return [
MetricSpec(
class_name="helm.benchmark.metrics.classification_metrics.ClassificationMetric",
args={"averages": ["weighted"], "labels": labels},
args={"averages": ["weighted"], "scores": ["f1", "precision", "recall"], "labels": labels},
)
]

Expand All @@ -44,7 +44,7 @@ def get_news_headline_spec(category: str) -> RunSpec:
name=f"gold_commodity_news:category={category}",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs() + _get_weighted_classification_metric_specs(labels=["Yes", "No"]),
metric_specs=get_exact_match_metric_specs() + _get_weighted_classification_metric_specs(labels=["yes", "no"]),
groups=["gold_commodity_news"],
)

Expand All @@ -53,7 +53,7 @@ def get_news_headline_spec(category: str) -> RunSpec:


@run_spec_function("legal_contract_summarization")
def get_legal_contract_spec() -> RunSpec:
def get_legal_contract_summarization_spec() -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.legal_contract_summarization_scenario.LegalContractSummarizationScenario",
args={},
Expand All @@ -76,6 +76,28 @@ def get_legal_contract_spec() -> RunSpec:
)


@run_spec_function("legal_opinion_sentiment_classification")
def get_legal_opinion_sentiment_classification_spec() -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.legal_opinion_sentiment_classification_scenario.LegalOpinionSentimentClassificationScenario", # noqa: E501
)

instructions = "Classify the sentences into one of the 3 sentiment categories. Possible labels: positive, neutral, negative." # noqa: E501
adapter_spec = get_generation_adapter_spec(
instructions=instructions,
output_noun="Label",
)

return RunSpec(
name="legal_opinion_sentiment_classification",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs()
+ _get_weighted_classification_metric_specs(labels=["positive", "neutral", "negative"]),
groups=["legal_opinion_sentiment_classification"],
)


@run_spec_function("casehold")
def get_casehold_spec() -> RunSpec:
scenario_spec = ScenarioSpec(class_name="helm.benchmark.scenarios.casehold_scenario.CaseHOLDScenario", args={})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os
from typing import List

import pandas as pd

from helm.benchmark.scenarios.scenario import (
Scenario,
Instance,
Reference,
TRAIN_SPLIT,
TEST_SPLIT,
CORRECT_TAG,
Input,
Output,
)
from helm.common.general import ensure_file_downloaded, ensure_directory_exists


class LegalOpinionSentimentClassificationScenario(Scenario):
"""
A legal opinion sentiment classification task based on the paper
Effective Approach to Develop a Sentiment Annotator For Legal Domain in a Low Resource Setting
[(Ratnayaka et al., 2020)](https://arxiv.org/pdf/2011.00318.pdf).
Example prompt:
Classify the sentences into one of the 3 sentiment categories. Possible labels: positive, neutral, negative.
{Sentence}
Label: {positive/neutral/negative}
"""

# Names of the tasks we support

name = "legal_opinion"
description = "Predicting the sentiment of the legal text in the positive, negative, or neutral."
tags = ["classification", "sentiment analysis", "legal"]

SENTIMENT_CLASSES = ["positive", "negative", "neutral"]
SPLIT_TO_URL = {
TRAIN_SPLIT: "https://osf.io/download/hfn62/",
TEST_SPLIT: "https://osf.io/download/q4adh/",
}

def create_instances(self, df: pd.DataFrame, split: str) -> List[Instance]:
instances: List[Instance] = []
assert split in [TRAIN_SPLIT, TEST_SPLIT]
if split == TRAIN_SPLIT:
phrase_column_name = "Phrase"
label_column_name = "Label"
else:
phrase_column_name = "sentence"
label_column_name = "label"
for row in df.itertuples():
phrase = getattr(row, phrase_column_name)
label_index = int(getattr(row, label_column_name))
label = LegalOpinionSentimentClassificationScenario.SENTIMENT_CLASSES[label_index]
instance = Instance(
input=Input(text=phrase), references=[Reference(Output(text=label), tags=[CORRECT_TAG])], split=split
)
instances.append(instance)
return instances

def get_instances(self, output_path: str) -> List[Instance]:
self.data_dir = os.path.join(output_path, "data")
data_dir = self.data_dir
ensure_directory_exists(data_dir)
instances: List[Instance] = []
for split, url in LegalOpinionSentimentClassificationScenario.SPLIT_TO_URL.items():
file_name = f"{split.lower()}.xlsx"
file_path = os.path.join(data_dir, file_name)
ensure_file_downloaded(
source_url=url,
target_path=os.path.join(data_dir, file_name),
)
df = pd.read_excel(file_path)
instances.extend(self.create_instances(df, split))
return instances
17 changes: 17 additions & 0 deletions src/helm/benchmark/static/schema_enterprise.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ run_groups:
subgroups:
- legal_contract_summarization
- casehold
- legal_opinion_sentiment_classification

- name: climate_scenarios
display_name: Climate Scenarios
Expand Down Expand Up @@ -187,6 +188,22 @@ run_groups:
when: before 2021
language: English

- name: legal_opinion_sentiment_classification
display_name: Legal Opinion Sentiment Classification
description: A legal opinion sentiment classification task based on the paper Effective Approach to Develop a Sentiment Annotator For Legal Domain in a Low Resource Setting [(Ratnayaka et al., 2020)](https://arxiv.org/pdf/2011.00318.pdf).
metric_groups:
- accuracy
- general_information
environment:
main_name: quasi_exact_match
main_split: test
taxonomy:
task: sentiment analysis
what: United States legal opinion texts
who: United States courts
when: Before 2020
language: English

- name: sumosum
display_name: SUMO Web Claims Summarization
description: A summarization benchmark based on the climate subset of the SUMO dataset ([Mishra et al., 2020](https://aclanthology.org/2020.wnut-1.12/)).
Expand Down

0 comments on commit 59dcfb1

Please sign in to comment.