From 79e94062600370694aefe86a5fbb9783329f701d Mon Sep 17 00:00:00 2001 From: Yanyu Zheng Date: Thu, 15 Apr 2021 12:53:10 -0400 Subject: [PATCH] fix: Open data handler fix (#442) * fix: open data issue Co-authored-by: zheyanyu --- .../lib/schema/create-study.json | 6 +- .../lib/schema/update-study.json | 6 +- .../lib/study/__tests__/study-service.test.js | 136 +++++++++++++----- .../backend/config/infra/cloudformation.yml | 25 ++-- .../backend/config/infra/functions.yml | 1 + .../__tests__/handler-impl.test.js | 64 +++++++-- .../open-data-scrape-handler/handler-impl.js | 65 +++++---- 7 files changed, 213 insertions(+), 90 deletions(-) diff --git a/addons/addon-base-raas/packages/base-raas-services/lib/schema/create-study.json b/addons/addon-base-raas/packages/base-raas-services/lib/schema/create-study.json index 9565a69749..ae37ec3e11 100644 --- a/addons/addon-base-raas/packages/base-raas-services/lib/schema/create-study.json +++ b/addons/addon-base-raas/packages/base-raas-services/lib/schema/create-study.json @@ -21,8 +21,7 @@ }, "description": { "type": "string", - "maxLength": 2048, - "pattern": "^([^<>{}]*)$" + "description": "Leaving length and pattern blank to accommodate open data" }, "projectId": { "type": "string", @@ -36,7 +35,8 @@ "sha": { "type": "string", "maxLength": 64, - "pattern": "^([A-Fa-f0-9])$" + "pattern": "^([A-Fa-f0-9]{40})$", + "description": "A unique identifier for Open Data in MD5 hash, hexadecimal" }, "resources": { "type": "array", diff --git a/addons/addon-base-raas/packages/base-raas-services/lib/schema/update-study.json b/addons/addon-base-raas/packages/base-raas-services/lib/schema/update-study.json index 4d4872212b..3d8dd915a0 100644 --- a/addons/addon-base-raas/packages/base-raas-services/lib/schema/update-study.json +++ b/addons/addon-base-raas/packages/base-raas-services/lib/schema/update-study.json @@ -21,13 +21,13 @@ }, "description": { "type": "string", - "maxLength": 2048, - "pattern": "^([^<>{}]*)$" + "description": "Leaving length and pattern blank to accommodate open data" }, "sha": { "type": "string", "maxLength": 64, - "pattern": "^([A-Fa-f0-9])$" + "pattern": "^([A-Fa-f0-9]{40})$", + "description": "A unique identifier for Open Data in MD5 hash, hexadecimal" }, "appRoleArn": { "type": "string", diff --git a/addons/addon-base-raas/packages/base-raas-services/lib/study/__tests__/study-service.test.js b/addons/addon-base-raas/packages/base-raas-services/lib/study/__tests__/study-service.test.js index 3c64e82bc3..a9a8e67c72 100644 --- a/addons/addon-base-raas/packages/base-raas-services/lib/study/__tests__/study-service.test.js +++ b/addons/addon-base-raas/packages/base-raas-services/lib/study/__tests__/study-service.test.js @@ -17,6 +17,7 @@ const _ = require('lodash'); const ServicesContainer = require('@aws-ee/base-services-container/lib/services-container'); const JsonSchemaValidationService = require('@aws-ee/base-services/lib/json-schema-validation-service'); const AwsService = require('@aws-ee/base-services/lib/aws/aws-service'); +const { getSystemRequestContext } = require('@aws-ee/base-services/lib/helpers/system-context'); jest.mock('@aws-ee/base-services/lib/db-service'); jest.mock('@aws-ee/base-services/lib/logger/logger-service'); @@ -389,6 +390,51 @@ describe('studyService', () => { }); describe('create', () => { + it('should succeed for valid open data record', async () => { + // BUILD + const systemContext = getSystemRequestContext(); + const studyData = { + description: 'Example study 1', + id: 'study-2', + name: 'Study 1', + resources: [ + { + arn: 'arn:aws:s3:::study1', + }, + ], + sha: '19d5b9c735712185ca1c691143e458a7aa2b7f69', + category: 'Open Data', + }; + dbService.table.update.mockResolvedValueOnce(studyData); + + // OPERATE + await expect(service.create(systemContext, studyData)).resolves.toMatchObject(studyData); + }); + it('should fail for invalid open data record', async () => { + // BUILD + const systemContext = getSystemRequestContext(); + const studyData = { + description: 'Example study 1', + id: 'study-2', + name: 'Study 1', + resources: [ + { + arn: 'arn:aws:s3:::study1', + }, + ], + sha: '19d5b9z735712185ca1c691143t458a7aa2b7f69', + category: 'Open Data', + }; + + // OPERATE + try { + await service.create(systemContext, studyData); + expect.hasAssertions(); + } catch (err) { + // CHECK + expect(err.message).toEqual('Input has validation errors'); + } + }); it('should fail due to missing id', async () => { // BUILD const ipt = { @@ -792,26 +838,6 @@ describe('studyService', () => { expect.objectContaining({ boom: true, code: 'badRequest', safe: true, message: 'Input has validation errors' }), ); }); - it('should fail since the given study desc is invalid', async () => { - // BUILD - const uid = 'u-currentUserId'; - const requestContext = { - principalIdentifier: { uid }, - principal: { userRole: 'researcher', status: 'active' }, - }; - const dataIpt = { - id: 'id', - name: 'name', - category: 'Organization', - description: '', - resources: [{ arn: 'arn:aws:s3:::someRandomStudyArn' }], - }; - // OPERATE - await expect(service.create(requestContext, dataIpt)).rejects.toThrow( - // CHECK - expect.objectContaining({ boom: true, code: 'badRequest', safe: true, message: 'Input has validation errors' }), - ); - }); it('should fail since the given study sha is invalid', async () => { // BUILD const uid = 'u-currentUserId'; @@ -836,6 +862,57 @@ describe('studyService', () => { }); describe('update', () => { + it('should succeed for valid open data record', async () => { + // BUILD + const systemContext = getSystemRequestContext(); + const studyData = { + description: 'Example study 1', + id: 'study-2', + name: 'Study 1', + resources: [ + { + arn: 'arn:aws:s3:::study1', + }, + ], + sha: '19d5b9c735712185ca1c691143e458a7aa2b7f69', + rev: 0, + }; + dbService.table.update.mockResolvedValueOnce(studyData); + service.getStudyPermissions = jest.fn().mockResolvedValueOnce({ + ...studyData, + category: 'Open Data', + status: 'reachable', + permissions: { adminUsers: [], readonlyUsers: [], readwriteUsers: [], writeonlyUsers: [] }, + }); + + // OPERATE + await expect(service.update(systemContext, studyData)).resolves.toMatchObject(studyData); + }); + it('should fail for invalid open data record', async () => { + // BUILD + const systemContext = getSystemRequestContext(); + const studyData = { + description: 'Example study 1', + id: 'study-2', + name: 'Study 1', + resources: [ + { + arn: 'arn:aws:s3:::study1', + }, + ], + sha: '19d5b9z735712185ca1c691143t458a7aa2b7f69', + rev: 0, + }; + + // OPERATE + try { + await service.update(systemContext, studyData); + expect.hasAssertions(); + } catch (err) { + // CHECK + expect(err.message).toEqual('Input has validation errors'); + } + }); it('should fail since the given study id is invalid', async () => { // BUILD const uid = 'u-currentUserId'; @@ -894,25 +971,6 @@ describe('studyService', () => { expect.objectContaining({ boom: true, code: 'badRequest', safe: true, message: 'Input has validation errors' }), ); }); - it('should fail since the given study desc is invalid', async () => { - // BUILD - const uid = 'u-currentUserId'; - const requestContext = { - principalIdentifier: { uid }, - principal: { userRole: 'researcher', status: 'active' }, - }; - const dataIpt = { - id: 'id', - name: 'name', - description: '', - resources: [{ arn: 'arn:aws:s3:::someRandomStudyArn' }], - }; - // OPERATE - await expect(service.update(requestContext, dataIpt)).rejects.toThrow( - // CHECK - expect.objectContaining({ boom: true, code: 'badRequest', safe: true, message: 'Input has validation errors' }), - ); - }); it('should fail due to missing rev', async () => { // BUILD diff --git a/main/solution/backend/config/infra/cloudformation.yml b/main/solution/backend/config/infra/cloudformation.yml index 089bd86884..2fe5998e14 100644 --- a/main/solution/backend/config/infra/cloudformation.yml +++ b/main/solution/backend/config/infra/cloudformation.yml @@ -656,16 +656,21 @@ Resources: - PolicyName: db-access PolicyDocument: Statement: - Effect: Allow - Action: - - dynamodb:DeleteItem - - dynamodb:GetItem - - dynamodb:PutItem - - dynamodb:Query - - dynamodb:Scan - - dynamodb:UpdateItem - Resource: - - !GetAtt [StudiesDb, Arn] + - Effect: Allow + Action: + - dynamodb:DeleteItem + - dynamodb:GetItem + - dynamodb:PutItem + - dynamodb:Query + - dynamodb:Scan + - dynamodb:UpdateItem + Resource: + - !GetAtt [StudiesDb, Arn] + - Effect: Allow + Action: + - dynamodb:GetItem + Resource: + - !GetAtt [StudyPermissionsDb, Arn] PolicyDataSourceReachabilityHandler: Type: AWS::IAM::ManagedPolicy diff --git a/main/solution/backend/config/infra/functions.yml b/main/solution/backend/config/infra/functions.yml index 0691c9e68b..eee44ab61e 100644 --- a/main/solution/backend/config/infra/functions.yml +++ b/main/solution/backend/config/infra/functions.yml @@ -12,6 +12,7 @@ openDataScrapeHandler: handler: src/lambdas/open-data-scrape-handler/handler.handler role: RoleOpenDataScrapeHandler tags: ${self:custom.tags} + timeout: 30 # in seconds, default is 6. this function usually finishes in 10secs, setting it to 30 just to be safe description: Handles scraping the metadata from the AWS open data registry. environment: APP_DB_STUDIES_CATEGORY_INDEX: ${self:custom.settings.dbStudiesCategoryIndex} diff --git a/main/solution/backend/src/lambdas/open-data-scrape-handler/__tests__/handler-impl.test.js b/main/solution/backend/src/lambdas/open-data-scrape-handler/__tests__/handler-impl.test.js index a0ba7109d0..53162529a2 100644 --- a/main/solution/backend/src/lambdas/open-data-scrape-handler/__tests__/handler-impl.test.js +++ b/main/solution/backend/src/lambdas/open-data-scrape-handler/__tests__/handler-impl.test.js @@ -1,4 +1,13 @@ -const { fetchOpenData } = require('../handler-impl'); +jest.mock('@aws-ee/base-raas-services/lib/study/study-service'); +const StudyService = require('@aws-ee/base-raas-services/lib/study/study-service'); + +jest.mock('@aws-ee/base-services/lib/logger/logger-service'); +const Log = require('@aws-ee/base-services/lib/logger/logger-service'); + +const { getSystemRequestContext } = require('@aws-ee/base-services/lib/helpers/system-context'); + +const _ = require('lodash'); +const { fetchOpenData, saveOpenData } = require('../handler-impl'); const consoleLogger = { info(...args) { @@ -7,23 +16,44 @@ const consoleLogger = { }, }; -describe('fetchOpenData', () => { - const validStudy = { - name: 'Study 1', +describe('fetchAndSaveOpenData', () => { + const log = new Log(); + const studyService = new StudyService(); + const systemContext = getSystemRequestContext(); + const studyData = { description: 'Example study 1', - tags: ['aws-pds', 'genetic', 'genomic', 'life sciences'], + id: 'study-2', + name: 'Study 1', resources: [ { - description: 'Description for Study 1', arn: 'arn:aws:s3:::study1', - region: 'us-east-1', - type: 'S3 Bucket', }, ], - id: 'study-2', sha: 'abc2', + category: 'Open Data', }; + it('should update study with the correct input', async () => { + studyService.find.mockResolvedValueOnce({ rev: 0 }); + await saveOpenData(log, [studyData], studyService); + expect(studyService.find).toHaveBeenCalledWith(systemContext, 'study-2'); + expect(studyService.update).toHaveBeenCalledWith(systemContext, { rev: 0, ..._.omit(studyData, 'category') }); + }); + it('should create study with the correct input', async () => { + await saveOpenData(log, [studyData], studyService); + expect(studyService.find).toHaveBeenCalledWith(systemContext, 'study-2'); + expect(studyService.create).toHaveBeenCalledWith(systemContext, studyData); + }); + it('should still resolve if study update fails', async () => { + studyService.find.mockResolvedValueOnce({ rev: 0 }); + studyService.update.mockImplementationOnce(async () => { + throw new Error('Study update error'); + }); + await expect(saveOpenData(log, [studyData], studyService)).resolves.toMatchObject([studyData]); + }); +}); + +describe('fetchOpenData', () => { const validStudyOpenData = { description: 'Example study 1', id: 'study-2', @@ -40,6 +70,22 @@ describe('fetchOpenData', () => { tags: ['aws-pds', 'genetic', 'genomic', 'life sciences'], }; + const validStudy = { + name: 'Study 1', + description: 'Example study 1', + tags: ['aws-pds', 'genetic', 'genomic', 'life sciences'], + resources: [ + { + description: 'Description for Study 1', + arn: 'arn:aws:s3:::study1', + region: 'us-east-1', + type: 'S3 Bucket', + }, + ], + id: 'study-2', + sha: 'abc2', + }; + const invalidStudy = { name: 'Study 2', description: 'Example study 2', diff --git a/main/solution/backend/src/lambdas/open-data-scrape-handler/handler-impl.js b/main/solution/backend/src/lambdas/open-data-scrape-handler/handler-impl.js index 2f94c71d55..73a1b7b59e 100644 --- a/main/solution/backend/src/lambdas/open-data-scrape-handler/handler-impl.js +++ b/main/solution/backend/src/lambdas/open-data-scrape-handler/handler-impl.js @@ -25,6 +25,10 @@ const consoleLogger = { // eslint-disable-next-line no-console console.log(...args); }, + error(...args) { + // eslint-disable-next-line no-console + console.error(...args); + }, }; const _ = require('lodash'); @@ -181,32 +185,7 @@ const newHandler = async ({ studyService, log = consoleLogger } = {}) => { }; } - return async () => { - const fileUrls = await fetchDatasetFiles(); - const opendata = await fetchOpenData({ fileUrls, requiredTags: scrape.filterTags, log, fetchFile }); - - const simplified = opendata.map(basicProjection); - - log.info('Updating studies'); - // create or update existing record - const userContext = getSystemRequestContext(); - await Promise.all( - simplified.map(async study => { - // studyService.find returns the entire db row for that study id - const existingStudy = await studyService.find(userContext, study.id); - if (!existingStudy) { - await studyService.create(userContext, study); - } else { - // remove additional properties before update call to match jsonSchemaValidation - const studyToUpdate = _.omit(existingStudy, ['updatedAt', 'updatedBy', 'createdAt', 'createdBy', 'category']); - - await studyService.update(userContext, studyToUpdate); - } - }), - ); - - return simplified; - }; + return async () => fetchAndSaveOpenData(fetchDatasetFiles, scrape, log, fetchFile, basicProjection, studyService); }; const fetchOpenData = async ({ fileUrls, requiredTags, log, fetchFile }) => { @@ -227,7 +206,41 @@ const fetchOpenData = async ({ fileUrls, requiredTags, log, fetchFile }) => { return filtered; }; +async function saveOpenData(log, simplified, studyService) { + log.info('Updating studies'); + // create or update existing record + const userContext = getSystemRequestContext(); + await Promise.all( + simplified.map(async study => { + try { + const existingStudy = await studyService.find(userContext, study.id); + if (!existingStudy) { + await studyService.create(userContext, study); + } else { + await studyService.update(userContext, { rev: existingStudy.rev, ..._.omit(study, 'category') }); + } + // Catch the err here so other open data update could continue + } catch (err) { + log.error(`Error updating study for id ${study.id} and name ${study.name}. See error and study data below: `); + log.error(err); + log.error(study); + } + }), + ); + return simplified; +} + +const fetchAndSaveOpenData = async (fetchDatasetFiles, scrape, log, fetchFile, basicProjection, studyService) => { + const fileUrls = await fetchDatasetFiles(); + const openData = await fetchOpenData({ fileUrls, requiredTags: scrape.filterTags, log, fetchFile }); + + const simplifiedStudyData = openData.map(basicProjection); + + return saveOpenData(log, simplifiedStudyData, studyService); +}; + module.exports = { fetchOpenData, newHandler, + saveOpenData, };