Skip to content

Commit

Permalink
[SPARKNLP-937] Fixing chunk construction when an entity is found (#14047
Browse files Browse the repository at this point in the history
)
  • Loading branch information
danilojsl authored Dec 7, 2023
1 parent 37b8d24 commit a1c4c13
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class AhoCorasickAutomaton(

if (state == 0 && previousState > 0) {
val node = nodes(previousState).get
if (node.isLeaf) {
if (node.isLeaf && node.entity.nonEmpty) {
val chunkAnnotation = buildAnnotation(chunk, node.entity, node.id, sentence)
chunkAnnotations.append(chunkAnnotation)
chunk.clear()
Expand All @@ -135,8 +135,10 @@ class AhoCorasickAutomaton(

if (chunk.nonEmpty) {
val node = nodes(previousState).get
val chunkAnnotation = buildAnnotation(chunk, node.entity, node.id, sentence)
chunkAnnotations.append(chunkAnnotation)
if (node.entity.nonEmpty) {
val chunkAnnotation = buildAnnotation(chunk, node.entity, node.id, sentence)
chunkAnnotations.append(chunkAnnotation)
}
chunk.clear()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,7 @@ class EntityRulerTest extends AnyFlatSpec with SparkSessionTest {
AssertAnnotations.assertFields(expectedEntitiesFromText6, actualEntities)
}

it should "work with LightPipeline" in {
it should "work with LightPipeline" taggedAs FastTest in {
val externalResource =
ExternalResource(s"$testPath/keywords_only.json", ReadAs.TEXT, Map("format" -> "json"))
val entityRulerPipeline = getEntityRulerKeywordsPipeline(externalResource, useStorage = false)
Expand Down

0 comments on commit a1c4c13

Please sign in to comment.