From 2db5c77eb51427c9a7568603cf312aade30e8536 Mon Sep 17 00:00:00 2001 From: Paul Bui-Quang Date: Tue, 4 Feb 2025 15:43:47 +0100 Subject: [PATCH] feat: add resource patient age filter tests --- .../engine/QueryBuilderBasicResource.scala | 60 +++++++------------ .../testCases/patientAge/expected.csv | 3 + .../testCases/patientAge/request.json | 36 +++++++++++ .../testCases/patientAge/resource_1.csv | 15 +++++ .../requester/query/QueryBuilderTest.scala | 4 ++ 5 files changed, 78 insertions(+), 40 deletions(-) create mode 100644 src/test/resources/testCases/patientAge/expected.csv create mode 100644 src/test/resources/testCases/patientAge/request.json create mode 100644 src/test/resources/testCases/patientAge/resource_1.csv diff --git a/src/main/scala/fr/aphp/id/eds/requester/query/engine/QueryBuilderBasicResource.scala b/src/main/scala/fr/aphp/id/eds/requester/query/engine/QueryBuilderBasicResource.scala index 036e29e..0bfc797 100644 --- a/src/main/scala/fr/aphp/id/eds/requester/query/engine/QueryBuilderBasicResource.scala +++ b/src/main/scala/fr/aphp/id/eds/requester/query/engine/QueryBuilderBasicResource.scala @@ -40,42 +40,21 @@ class QueryBuilderBasicResource(val querySolver: ResourceResolver) { (year_min, month_min, day_min, year_max, month_max, day_max) } - def addAgeColumn(dataFrame: DataFrame, patientAge: PatientAge): DataFrame = { - val (year_min, _, _, year_max, _, _) = getDecomposedAgeMinAndMax(patientAge) - if (year_max == 0 & year_min == 0) { - dataFrame.withColumn( - QueryColumn.AGE, - F.datediff(F.col(s"${QueryBuilderUtils.getDateColumn(criterionId)}"), - F.col(QueryBuilderUtils.getPatientBirthColumn(criterionId)))) - } else { - dataFrame.withColumn( - colName = QueryColumn.AGE, - F.datediff(F.col(s"${QueryBuilderUtils.getDateColumn(criterionId)}"), - F.col(QueryBuilderUtils.getPatientBirthColumn(criterionId))) / 365.25 - ) - } + def addAgeColumn(dataFrame: DataFrame): DataFrame = { + dataFrame.withColumn(QueryColumn.AGE, + F.datediff(F.col(s"${QueryBuilderUtils.getDateColumn(criterionId)}"), + F.col(QueryBuilderUtils.getPatientBirthColumn(criterionId)))) } def getAgeFilter(patientAge: PatientAge, dateIsNotNull: Boolean): Column = { val (year_min, month_min, day_min, year_max, month_max, day_max) = getDecomposedAgeMinAndMax(patientAge) - var sparkFilterList = new ListBuffer[Column]() - sparkFilterList = if (year_max == 0 & year_min == 0) { - if (patientAge.maxAge.isDefined) { - sparkFilterList += F.col(QueryColumn.AGE) <= month_max * 30 + day_max - } - if (patientAge.minAge.isDefined) { - sparkFilterList += F.col(QueryColumn.AGE) >= month_min * 30 + day_min - } - sparkFilterList - } else { - if (patientAge.maxAge.isDefined) { - sparkFilterList += F.col(QueryColumn.AGE) <= year_max - } - if (patientAge.minAge.isDefined) { - sparkFilterList += F.col(QueryColumn.AGE) >= year_min - } - sparkFilterList + val sparkFilterList = new ListBuffer[Column]() + if (patientAge.maxAge.isDefined) { + sparkFilterList += F.col(QueryColumn.AGE) <= month_max * 30 + day_max + year_max * 365.25 + } + if (patientAge.minAge.isDefined) { + sparkFilterList += F.col(QueryColumn.AGE) >= month_min * 30 + day_min + year_min * 365.25 } val unifiedSparkFilter = sparkFilterList.toList.reduce(_ && _) if (!dateIsNotNull) @@ -97,16 +76,14 @@ class QueryBuilderBasicResource(val querySolver: ResourceResolver) { criterionId, datePreference, basicResource.resourceType) - val criterionDataFrameWithAgeColumn: DataFrame = - addAgeColumn(criterionDataFrameWithDateColumn, patientAge) + val criterionDataFrameWithAgeColumn: DataFrame = addAgeColumn(criterionDataFrameWithDateColumn) val ageBasedSparkFilter: Column = getAgeFilter(patientAge, dateIsNotNull) if (logger.isDebugEnabled) logger.debug( s"Basic Resource : filterByPatientAge : filter=$ageBasedSparkFilter, df.head=${criterionDataFrameWithDateColumn.head(10).toList.slice(0, 10)}") - var filteredCriterionDataFrame = - criterionDataFrameWithAgeColumn.filter(ageBasedSparkFilter === true) + var filteredCriterionDataFrame = criterionDataFrameWithAgeColumn.filter(ageBasedSparkFilter === true) filteredCriterionDataFrame = dropTemporaryAgeColumns(filteredCriterionDataFrame) if (logger.isDebugEnabled) logger.debug( @@ -129,7 +106,9 @@ class QueryBuilderBasicResource(val querySolver: ResourceResolver) { var operator = code.operator if (operator != ">=" || n != 1) { operator = if (operator == "=") "==" else operator - val groupByColumns = ListBuffer[String](QueryBuilderUtils.getSubjectColumn(criterionId), QueryBuilderUtils.buildColName(criterionId, codeColumn)) + val groupByColumns = ListBuffer[String]( + QueryBuilderUtils.getSubjectColumn(criterionId), + QueryBuilderUtils.buildColName(criterionId, codeColumn)) val filterPatientDataFrame: DataFrame = criterionDataFrame .groupBy(groupByColumns.head, groupByColumns.tail.toList: _*) .count() @@ -144,7 +123,8 @@ class QueryBuilderBasicResource(val querySolver: ResourceResolver) { } if (filterDataframe.isDefined) { val filterPatientDataFrame = filterDataframe.get - return criterionDataFrame.join(filterPatientDataFrame, + return criterionDataFrame.join( + filterPatientDataFrame, criterionDataFrame(subjectColumn) <=> filterPatientDataFrame(subjectColumn), "left_semi") } @@ -306,9 +286,9 @@ class QueryBuilderBasicResource(val querySolver: ResourceResolver) { isInTemporalConstraint) criterionDataFrame = filterByUniqueCodes(criterionDataFrame, basicResource, criterionId) criterionDataFrame = qbUtils.cleanDataFrame(criterionDataFrame, - isInTemporalConstraint, - selectedColumns, - subjectColumn) + isInTemporalConstraint, + selectedColumns, + subjectColumn) if (logger.isDebugEnabled) { logger.debug( diff --git a/src/test/resources/testCases/patientAge/expected.csv b/src/test/resources/testCases/patientAge/expected.csv new file mode 100644 index 0000000..7a6cf29 --- /dev/null +++ b/src/test/resources/testCases/patientAge/expected.csv @@ -0,0 +1,3 @@ +subject_id +7 +1 diff --git a/src/test/resources/testCases/patientAge/request.json b/src/test/resources/testCases/patientAge/request.json new file mode 100644 index 0000000..338160b --- /dev/null +++ b/src/test/resources/testCases/patientAge/request.json @@ -0,0 +1,36 @@ +{ + "sourcePopulation": { + "caresiteCohortList": [ + 57664 + ] + }, + "_type": "request", + "request": { + "_type": "andGroup", + "_id": 0, + "isInclusive": true, + "criteria": [ + { + "_type": "basicResource", + "_id": 1, + "isInclusive": true, + "resourceType": "medicationRequestAphp", + "filterSolr": "(codeList:R26) AND active:true", + "filterFhir": "codeList=R26", + "occurrence": { + "n": 1, + "operator": ">=", + "sameEncounter": false, + "sameDay": false + }, + "patientAge": { + "minAge": "18-2-4", + "maxAge": "20-0-0", + "datePreference": ["encounter_start_date"] + } + } + ], + "temporalConstraints": [] + }, + "id": 3 +} \ No newline at end of file diff --git a/src/test/resources/testCases/patientAge/resource_1.csv b/src/test/resources/testCases/patientAge/resource_1.csv new file mode 100644 index 0000000..4f3403d --- /dev/null +++ b/src/test/resources/testCases/patientAge/resource_1.csv @@ -0,0 +1,15 @@ +_subject;patient_birthdate;encounter_start_date +1;1977-04-01T02:00:00Z;1995-06-10T02:00:00Z +1;1977-04-01T02:00:00Z;2021-12-03T14:09:00Z +1;1977-04-01T02:00:00Z;2021-12-03T14:09:00Z +4;1978-04-01T02:00:00Z;1996-06-01T02:00:00Z +4;1978-03-31T02:00:00Z;2021-12-03T14:09:00Z +6;1979-03-31T02:00:00Z;2021-12-03T14:09:00Z +7;1980-03-31T02:00:00Z;2000-02-03T14:09:00Z +7;1980-03-31T02:00:00Z;2021-12-03T14:09:00Z +7;1980-03-31T02:00:00Z;2021-12-03T14:09:00Z +7;1980-03-31T02:00:00Z;2021-12-03T14:09:00Z +7;1980-03-31T02:00:00Z;2021-12-03T14:09:00Z +8;1981-03-31T02:00:00Z;2021-12-03T14:09:00Z +9;1982-03-31T02:00:00Z;2021-12-03T14:09:00Z + diff --git a/src/test/scala/fr/aphp/id/eds/requester/query/QueryBuilderTest.scala b/src/test/scala/fr/aphp/id/eds/requester/query/QueryBuilderTest.scala index 810dad6..ec8b91d 100644 --- a/src/test/scala/fr/aphp/id/eds/requester/query/QueryBuilderTest.scala +++ b/src/test/scala/fr/aphp/id/eds/requester/query/QueryBuilderTest.scala @@ -215,4 +215,8 @@ class QueryBuilderTest extends AnyFunSuiteLike with DatasetComparer { testCaseEvaluate("nAmongMUniqueFields") } + test("patientAge") { + testCaseEvaluate("patientAge") + } + }