Skip to content

Commit

Permalink
Merge branch 'master' into filter_relations
Browse files Browse the repository at this point in the history
  • Loading branch information
alanakbik authored Feb 1, 2025
2 parents 6f8d168 + e00e0ff commit cbd8be3
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 23 deletions.
5 changes: 2 additions & 3 deletions flair/embeddings/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def embed(self, sentences: Union[list[Sentence], Sentence]):
sentences = [sentences]

raw_sentences = [s.to_original_text() for s in sentences]
tfidf_vectors = torch.from_numpy(self.vectorizer.transform(raw_sentences).A)
tfidf_vectors = torch.from_numpy(self.vectorizer.transform(raw_sentences).toarray())

for sentence_id, sentence in enumerate(sentences):
sentence.set_embedding(self.name, tfidf_vectors[sentence_id])
Expand Down Expand Up @@ -691,10 +691,9 @@ def _add_embeddings_internal(self, sentences: list[Sentence]):

lengths: list[int] = [len(sentence.tokens) for sentence in sentences]
padding_length: int = max(max(lengths), self.min_sequence_length)

pre_allocated_zero_tensor = torch.zeros(
self.embeddings.embedding_length * padding_length,
dtype=self.convs[0].weight.dtype,
dtype=cast(torch.nn.Conv1d, self.convs[0]).weight.dtype,
device=flair.device,
)

Expand Down
2 changes: 1 addition & 1 deletion flair/embeddings/token.py
Original file line number Diff line number Diff line change
Expand Up @@ -1466,7 +1466,7 @@ def _add_embeddings_internal(self, sentences: list[Sentence]) -> list[Sentence]:
word = token.text if self.field is None else token.get_label(self.field).value

if word.strip() == "":
ids = [self.spm.vocab_size(), self.embedder.spm.vocab_size()]
ids = [self.spm.vocab_size(), self.spm.vocab_size()]
else:
if self.do_preproc:
word = self._preprocess(word)
Expand Down
2 changes: 1 addition & 1 deletion flair/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,4 +382,4 @@ def load_torch_state(model_file: str) -> dict[str, typing.Any]:
# to load models on some Mac/Windows setups
# see https://github.com/zalandoresearch/flair/issues/351
f = load_big_file(model_file)
return torch.load(f, map_location="cpu")
return torch.load(f, map_location="cpu", weights_only=False)
8 changes: 4 additions & 4 deletions flair/models/pairwise_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def evaluate(
f"spearman: {metric.spearmanr():.4f}"
)

scores = {
eval_metrics = {
"loss": eval_loss.item(),
"mse": metric.mean_squared_error(),
"mae": metric.mean_absolute_error(),
Expand All @@ -354,12 +354,12 @@ def evaluate(
}

if main_evaluation_metric[0] in ("correlation", "other"):
main_score = scores[main_evaluation_metric[1]]
main_score = eval_metrics[main_evaluation_metric[1]]
else:
main_score = scores["spearman"]
main_score = eval_metrics["spearman"]

return Result(
main_score=main_score,
detailed_results=detailed_result,
scores=scores,
scores=eval_metrics,
)
4 changes: 2 additions & 2 deletions flair/models/sequence_tagger_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,8 +862,8 @@ def push_to_hub(
self.save(local_model_path)

# Determine if model card already exists
info = model_info(repo_id, use_auth_token=token)
write_readme = all(f.rfilename != "README.md" for f in info.siblings)
info = model_info(repo_id, token=token)
write_readme = info.siblings is None or all(f.rfilename != "README.md" for f in info.siblings)

# Generate and save model card
if write_readme:
Expand Down
27 changes: 17 additions & 10 deletions flair/models/text_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def evaluate(
out_path: Optional[Union[str, Path]] = None,
embedding_storage_mode: EmbeddingStorageMode = "none",
mini_batch_size: int = 32,
main_evaluation_metric: tuple[str, str] = ("micro avg", "f1-score"),
main_evaluation_metric: tuple[str, str] = ("correlation", "pearson"),
exclude_labels: Optional[list[str]] = None,
gold_label_dictionary: Optional[Dictionary] = None,
return_loss: bool = True,
Expand Down Expand Up @@ -195,16 +195,23 @@ def evaluate(
f"spearman: {metric.spearmanr():.4f}"
)

result: Result = Result(
main_score=metric.pearsonr(),
eval_metrics = {
"loss": eval_loss.item(),
"mse": metric.mean_squared_error(),
"mae": metric.mean_absolute_error(),
"pearson": metric.pearsonr(),
"spearman": metric.spearmanr(),
}

if main_evaluation_metric[0] in ("correlation", "other"):
main_score = eval_metrics[main_evaluation_metric[1]]
else:
main_score = eval_metrics["spearman"]

result = Result(
main_score=main_score,
detailed_results=detailed_result,
scores={
"loss": eval_loss.item(),
"mse": metric.mean_squared_error(),
"mae": metric.mean_absolute_error(),
"pearson": metric.pearsonr(),
"spearman": metric.spearmanr(),
},
scores=eval_metrics,
)

return result
Expand Down
4 changes: 3 additions & 1 deletion flair/nn/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,9 @@ def predict(

if has_unknown_label:
has_any_unknown_label = True
scores = torch.index_select(scores, 0, torch.tensor(filtered_indices, device=flair.device))
scores = torch.index_select(
scores, 0, torch.tensor(filtered_indices, device=flair.device, dtype=torch.int32)
)

gold_labels = self._prepare_label_tensor([data_points[index] for index in filtered_indices])
overall_loss += self._calculate_loss(scores, gold_labels)[0]
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ scikit-learn>=1.0.2
segtok>=1.5.11
sqlitedict>=2.0.0
tabulate>=0.8.10
torch>=1.5.0,!=1.8
torch>=1.13.1
tqdm>=4.63.0
transformer-smaller-training-vocab>=0.2.3
transformers[sentencepiece]>=4.25.0,<5.0.0
Expand Down

0 comments on commit cbd8be3

Please sign in to comment.