Skip to content

Commit

Permalink
Remove unused files
Browse files Browse the repository at this point in the history
  • Loading branch information
jteijema committed Nov 5, 2021
1 parent 292f093 commit 7a1b351
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 418 deletions.
192 changes: 0 additions & 192 deletions asreviewcontrib/semantic_clustering/build.py

This file was deleted.

116 changes: 0 additions & 116 deletions asreviewcontrib/semantic_clustering/load_data.py

This file was deleted.

17 changes: 13 additions & 4 deletions asreviewcontrib/semantic_clustering/semantic_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from transformers import logging
logging.set_verbosity_error()

#import tqdm
# import tqdm


def SemanticClustering(asreview_data_object):
Expand All @@ -49,13 +49,22 @@ def SemanticClustering(asreview_data_object):

# tokenize abstracts and add to data
print("Tokenizing abstracts...")
data['tokenized'] = data['abstract'].apply(lambda x: tokenizer.encode(
data['tokenized'] = data['abstract'].apply(lambda x: tokenizer.encode_plus(
x,
padding='longest',
add_special_tokens=True,
add_special_tokens=False,
return_tensors="pt"))

print(data)
# generate embeddings
print("Generating embeddings...")
data['embeddings'] = data['tokenized'].apply(
lambda x: model(**x, output_hidden_states=False)[-1])

from dim_reduct import run_pca
n_components = .98
#pca = run_pca(data['embeddings'], n_components)

print(data['embeddings'][0].detach().numpy())


def load_data(asreview_data_object):
Expand Down
Loading

0 comments on commit 7a1b351

Please sign in to comment.