Skip to content

Commit

Permalink
sqaud dataset bug fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
b06505015 committed Feb 3, 2021
1 parent 6efce84 commit 56e277d
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions genienlp/tasks/generic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,10 +415,10 @@ def __init__(self, path, subsample=None, lower=False, cached_path=None, skip_cac
answer_end = answer_start + len(answer)
context_before_answer = context[:answer_start]
context_after_answer = context[answer_end:]
BEGIN = 'beginanswer '
END = ' endanswer'
BEGIN = 'beginanswer'
END = 'endanswer'

tagged_context = context_before_answer + BEGIN + answer + END + context_after_answer
tagged_context = context_before_answer + BEGIN + ' ' + answer + ' ' + END + context_after_answer
tagged_context = tagged_context.split()

tokenized_answer = answer.split()
Expand Down Expand Up @@ -457,7 +457,7 @@ def __init__(self, path, subsample=None, lower=False, cached_path=None, skip_cac
import pdb;
pdb.set_trace()
context_spans += [len(tagged_context)]
for context_idx, answer_word in zip(context_spans, ex.answer):
for context_idx, answer_word in zip(context_spans, tokenized_answer):
if context_idx == len(tagged_context):
continue
if tagged_context[context_idx] != answer_word:
Expand Down

0 comments on commit 56e277d

Please sign in to comment.