Display a message for unsupported transcript file formats (#312)

* Display a message for unsupported transcript file formats * Remove .doc file checks, updated documentation
samvera-labs · Dec 14, 2023 · 7ca2109 · 7ca2109
1 parent 80b47b2
commit 7ca2109
Show file tree

Hide file tree

Showing 5 changed files with 90 additions and 27 deletions.
diff --git a/src/components/Transcript/Transcript.js b/src/components/Transcript/Transcript.js
@@ -13,6 +13,7 @@ import './Transcript.scss';
 
 const NO_TRANSCRIPTS_MSG = 'No valid Transcript(s) found, please check again.';
 const INVALID_URL_MSG = 'Invalid URL for transcript, please check again.';
+const NO_SUPPORT = 'Transcript format is not supported, please check again.';
 
 /**
  * 
@@ -255,6 +256,8 @@ const Transcript = ({ playerID, manifestUrl, transcripts = [] }) => {
             newError = INVALID_URL_MSG;
           } else if (tType === TRANSCRIPT_TYPES.noTranscript) {
             newError = NO_TRANSCRIPTS_MSG;
+          } else if (tType === TRANSCRIPT_TYPES.noSupport) {
+            newError = NO_SUPPORT;
           }
           setError(newError);
           setTranscript(tData);
@@ -392,7 +395,7 @@ const Transcript = ({ playerID, manifestUrl, transcripts = [] }) => {
       setTimedText([]);
       let timedText = [];
       switch (transcriptInfo.tType) {
-        case TRANSCRIPT_TYPES.doc:
+        case TRANSCRIPT_TYPES.docx:
           // when given a word document as a transcript
           timedText.push(
             <div
@@ -449,6 +452,7 @@ const Transcript = ({ playerID, manifestUrl, transcripts = [] }) => {
             />
           );
           break;
+        case TRANSCRIPT_TYPES.noSupport:
         case TRANSCRIPT_TYPES.invalid:
         case TRANSCRIPT_TYPES.noTranscript:
         default:

diff --git a/src/components/Transcript/Transcript.md b/src/components/Transcript/Transcript.md
@@ -14,7 +14,7 @@ Transcript component displays any available transcript data in a given IIIF mani
      - IIIF Manifest
        - As a list of `supplementing` annotations
        - As an external resource linked through `annotations` property with `supplementing` motivation
-     - Word document
+     - Word document (.docx)
      - Plain text file
      - WebVTT
 

diff --git a/src/components/Transcript/Transcript.test.js b/src/components/Transcript/Transcript.test.js
@@ -201,7 +201,7 @@ describe('Transcript component', () => {
             '<p><strong>Speaker 1:</strong> <em>Lorem ipsum</em> dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Etiam non quam lacus suspendisse faucibus interdum posuere. </p>',
           ],
           tUrl: 'http://example.com/transcript.doc',
-          tType: transcriptParser.TRANSCRIPT_TYPES.doc,
+          tType: transcriptParser.TRANSCRIPT_TYPES.docx,
           tFileExt: 'doc',
         };
         const parseTranscriptMock = jest
@@ -403,15 +403,14 @@ describe('Transcript component', () => {
           },
         ],
       };
-      const sanitizeTranscriptsMock = jest
-        .spyOn(transcriptParser, 'sanitizeTranscripts')
-        .mockReturnValue([{
-          title: 'Image transcript - no transcript',
-          id: 'Image transcript - no transcript-0-0',
-          isMachineGen: false,
-          url: 'https://example.com/transcript_image.png'
-        }]);
 
+      const parseTranscriptMock = jest
+        .spyOn(transcriptParser, 'parseTranscriptData')
+        .mockReturnValue({
+          tData: [],
+          tUrl: 'https://example.com/transcript_image.png',
+          tType: transcriptParser.TRANSCRIPT_TYPES.noSupport,
+        });
       render(
         <React.Fragment>
           <video id="player-id" />
@@ -421,11 +420,54 @@ describe('Transcript component', () => {
       await act(() => Promise.resolve());
 
       await waitFor(() => {
-        expect(sanitizeTranscriptsMock).toHaveBeenCalledTimes(1);
-        expect(screen.queryByTestId('transcript_content_0')).toBeInTheDocument();
+        expect(parseTranscriptMock).toHaveBeenCalledTimes(1);
+        expect(screen.queryByTestId('transcript_content_-2')).toBeInTheDocument();
         expect(screen.queryByTestId('no-transcript')).toBeInTheDocument();
         expect(screen.getByTestId('no-transcript')).toHaveTextContent(
-          'No valid Transcript(s) found, please check again'
+          'Transcript format is not supported, please check again.'
+        );
+      });
+    });
+
+    test('invalid transcript file format: text/html', async () => {
+      const props = {
+        playerID: 'player-id',
+        transcripts: [
+          {
+            canvasId: 0,
+            items: [
+              {
+                title: 'Doc Transcript',
+                url: 'https://example.com/section/2/supplemental_files/12',
+              },
+            ],
+          },
+        ],
+      };
+
+      const parseTranscriptMock = jest
+        .spyOn(transcriptParser, 'parseTranscriptData')
+        .mockReturnValue({
+          tData: [],
+          tUrl: 'https://example.com/section/2/supplemental_files/12',
+          tType: transcriptParser.TRANSCRIPT_TYPES.noSupport,
+        });
+
+
+      render(
+        <React.Fragment>
+          <video id="player-id" />
+          <Transcript {...props} />
+        </React.Fragment>
+      );
+      await act(() => Promise.resolve());
+
+      await waitFor(() => {
+        expect(parseTranscriptMock).toHaveBeenCalledTimes(1);
+        expect(screen.queryByTestId('transcript_content_-2')).toBeInTheDocument();
+        expect(screen.queryByTestId('no-transcript')).toBeInTheDocument();
+        expect(screen.getByTestId('no-transcript')).toHaveTextContent(
+          'Transcript format is not supported, please check again.'
         );
       });
     });
@@ -464,7 +506,7 @@ describe('Transcript component', () => {
       await act(() => Promise.resolve());
 
       await waitFor(() => {
-        expect(parseTranscriptMock).toHaveBeenCalledTimes(0);
+        expect(parseTranscriptMock).toHaveBeenCalledTimes(1);
         expect(screen.queryByTestId('transcript_content_0')).toBeInTheDocument();
         expect(screen.queryByTestId('no-transcript')).toBeInTheDocument();
         expect(screen.getByTestId('no-transcript')).toHaveTextContent(

diff --git a/src/services/transcript-parser.js b/src/services/transcript-parser.js
@@ -16,12 +16,11 @@ const TRANSCRIPT_MIME_TYPES = [
   { type: 'application/json', ext: 'json' },
   { type: 'text/vtt', ext: 'vtt' },
   { type: 'text/plain', ext: 'txt' },
-  { type: 'application/msword', ext: 'doc' },
   { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', ext: 'docx' }
 ];
 
 // ENum for describing transcript types include invalid and no transcript info
-export const TRANSCRIPT_TYPES = { invalid: -1, noTranscript: 0, timedText: 1, plainText: 2, doc: 3 };
+export const TRANSCRIPT_TYPES = { noSupport: -2, invalid: -1, noTranscript: 0, timedText: 1, plainText: 2, docx: 3 };
 
 /**
  * Parse the transcript information in the Manifest presented as supplementing annotations
@@ -229,7 +228,10 @@ export async function parseTranscriptData(url, canvasIndex) {
   if (type.length > 0) {
     fileType = type[0].ext;
   } else {
-    fileType = url.split('.').reverse()[0];
+    let urlExt = url.split('.').reverse()[0];
+    // Only use this if it exists in the supported list of file types for the component
+    let filteredExt = TRANSCRIPT_MIME_TYPES.filter(tt => tt.ext === urlExt);
+    fileType = filteredExt.length > 0 ? urlExt : '';
   }
 
   // Return empty array to display an error message
@@ -263,13 +265,12 @@ export async function parseTranscriptData(url, canvasIndex) {
         let parsedText = textData.replace(/\n/g, "<br />");
         return { tData: [parsedText], tUrl: url, tType: TRANSCRIPT_TYPES.plainText, tFileExt: fileType };
       }
-    // for .doc and .docx files
-    case 'doc':
+    // for .docx files
     case 'docx':
       tData = await parseWordFile(fileData);
-      return { tData: [tData], tUrl: url, tType: TRANSCRIPT_TYPES.doc, tFileExt: fileType };
+      return { tData: [tData], tUrl: url, tType: TRANSCRIPT_TYPES.docx, tFileExt: fileType };
     default:
-      return { tData: [], tUrl: url };
+      return { tData: [], tUrl: url, tType: TRANSCRIPT_TYPES.noSupport };
   }
 }
 

diff --git a/src/services/transcript-parser.test.js b/src/services/transcript-parser.test.js
@@ -318,11 +318,11 @@ describe('transcript-parser', () => {
       const fetchDoc = jest.spyOn(global, 'fetch').mockResolvedValueOnce({
         status: 200,
         headers: {
-          get: jest.fn(() => 'application/msword'),
+          get: jest.fn(() => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'),
         },
         blob: jest.fn(() => {
           size: 11064;
-          type: 'application/msword';
+          type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
         }),
       });
 
@@ -333,14 +333,14 @@ describe('transcript-parser', () => {
         });
 
       const response = await transcriptParser.parseTranscriptData(
-        'https://example.com/transcript.doc',
+        'https://example.com/transcript.docx',
         0
       );
 
       expect(fetchDoc).toHaveBeenCalledTimes(1);
       expect(convertSpy).toHaveBeenCalledTimes(1);
       expect(response.tData).toHaveLength(1);
-      expect(response.tFileExt).toEqual('doc');
+      expect(response.tFileExt).toEqual('docx');
     });
 
     test('with a WebVTT file URL', async () => {
@@ -387,7 +387,7 @@ describe('transcript-parser', () => {
       expect(response.tFileExt).toEqual('vtt');
     });
 
-    test('with invalid transcript file type: .png', async () => {
+    test('with unsupported transcript file type in URL: .png', async () => {
       const fetchImage = jest.spyOn(global, 'fetch').mockResolvedValueOnce({
         status: 200,
         headers: { get: jest.fn(() => 'image/png') },
@@ -399,6 +399,22 @@ describe('transcript-parser', () => {
       expect(fetchImage).toHaveBeenCalledTimes(1);
       expect(response.tData).toEqual([]);
       expect(response.tFileExt).toEqual(undefined);
+      expect(response.tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.noSupport);
+    });
+
+    test('with unsupported transcript file content-type: text/html', async () => {
+      const fetchDoc = jest.spyOn(global, 'fetch').mockResolvedValueOnce({
+        status: 200,
+        headers: { get: jest.fn(() => 'text/html') }
+      });
+      const response = await transcriptParser.parseTranscriptData(
+        'https://example.com/section/2/supplemental_files/12',
+        0
+      );
+      expect(fetchDoc).toHaveBeenCalledTimes(1);
+      expect(response.tData).toEqual([]);
+      expect(response.tFileExt).toEqual(undefined);
+      expect(response.tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.noSupport);
     });
 
     test('with an invalid URL', async () => {