Skip to content

Commit

Permalink
Display a message for unsupported transcript file formats (#312)
Browse files Browse the repository at this point in the history
* Display a message for unsupported transcript file formats

* Remove .doc file checks, updated documentation
  • Loading branch information
Dananji authored Dec 14, 2023
1 parent 80b47b2 commit 7ca2109
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 27 deletions.
6 changes: 5 additions & 1 deletion src/components/Transcript/Transcript.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import './Transcript.scss';

const NO_TRANSCRIPTS_MSG = 'No valid Transcript(s) found, please check again.';
const INVALID_URL_MSG = 'Invalid URL for transcript, please check again.';
const NO_SUPPORT = 'Transcript format is not supported, please check again.';

/**
*
Expand Down Expand Up @@ -255,6 +256,8 @@ const Transcript = ({ playerID, manifestUrl, transcripts = [] }) => {
newError = INVALID_URL_MSG;
} else if (tType === TRANSCRIPT_TYPES.noTranscript) {
newError = NO_TRANSCRIPTS_MSG;
} else if (tType === TRANSCRIPT_TYPES.noSupport) {
newError = NO_SUPPORT;
}
setError(newError);
setTranscript(tData);
Expand Down Expand Up @@ -392,7 +395,7 @@ const Transcript = ({ playerID, manifestUrl, transcripts = [] }) => {
setTimedText([]);
let timedText = [];
switch (transcriptInfo.tType) {
case TRANSCRIPT_TYPES.doc:
case TRANSCRIPT_TYPES.docx:
// when given a word document as a transcript
timedText.push(
<div
Expand Down Expand Up @@ -449,6 +452,7 @@ const Transcript = ({ playerID, manifestUrl, transcripts = [] }) => {
/>
);
break;
case TRANSCRIPT_TYPES.noSupport:
case TRANSCRIPT_TYPES.invalid:
case TRANSCRIPT_TYPES.noTranscript:
default:
Expand Down
2 changes: 1 addition & 1 deletion src/components/Transcript/Transcript.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Transcript component displays any available transcript data in a given IIIF mani
- IIIF Manifest
- As a list of `supplementing` annotations
- As an external resource linked through `annotations` property with `supplementing` motivation
- Word document
- Word document (.docx)
- Plain text file
- WebVTT

Expand Down
68 changes: 55 additions & 13 deletions src/components/Transcript/Transcript.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ describe('Transcript component', () => {
'<p><strong>Speaker 1:</strong> <em>Lorem ipsum</em> dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Etiam non quam lacus suspendisse faucibus interdum posuere. </p>',
],
tUrl: 'http://example.com/transcript.doc',
tType: transcriptParser.TRANSCRIPT_TYPES.doc,
tType: transcriptParser.TRANSCRIPT_TYPES.docx,
tFileExt: 'doc',
};
const parseTranscriptMock = jest
Expand Down Expand Up @@ -403,15 +403,14 @@ describe('Transcript component', () => {
},
],
};
const sanitizeTranscriptsMock = jest
.spyOn(transcriptParser, 'sanitizeTranscripts')
.mockReturnValue([{
title: 'Image transcript - no transcript',
id: 'Image transcript - no transcript-0-0',
isMachineGen: false,
url: 'https://example.com/transcript_image.png'
}]);

const parseTranscriptMock = jest
.spyOn(transcriptParser, 'parseTranscriptData')
.mockReturnValue({
tData: [],
tUrl: 'https://example.com/transcript_image.png',
tType: transcriptParser.TRANSCRIPT_TYPES.noSupport,
});
render(
<React.Fragment>
<video id="player-id" />
Expand All @@ -421,11 +420,54 @@ describe('Transcript component', () => {
await act(() => Promise.resolve());

await waitFor(() => {
expect(sanitizeTranscriptsMock).toHaveBeenCalledTimes(1);
expect(screen.queryByTestId('transcript_content_0')).toBeInTheDocument();
expect(parseTranscriptMock).toHaveBeenCalledTimes(1);
expect(screen.queryByTestId('transcript_content_-2')).toBeInTheDocument();
expect(screen.queryByTestId('no-transcript')).toBeInTheDocument();
expect(screen.getByTestId('no-transcript')).toHaveTextContent(
'No valid Transcript(s) found, please check again'
'Transcript format is not supported, please check again.'
);
});
});

test('invalid transcript file format: text/html', async () => {
const props = {
playerID: 'player-id',
transcripts: [
{
canvasId: 0,
items: [
{
title: 'Doc Transcript',
url: 'https://example.com/section/2/supplemental_files/12',
},
],
},
],
};

const parseTranscriptMock = jest
.spyOn(transcriptParser, 'parseTranscriptData')
.mockReturnValue({
tData: [],
tUrl: 'https://example.com/section/2/supplemental_files/12',
tType: transcriptParser.TRANSCRIPT_TYPES.noSupport,
});


render(
<React.Fragment>
<video id="player-id" />
<Transcript {...props} />
</React.Fragment>
);
await act(() => Promise.resolve());

await waitFor(() => {
expect(parseTranscriptMock).toHaveBeenCalledTimes(1);
expect(screen.queryByTestId('transcript_content_-2')).toBeInTheDocument();
expect(screen.queryByTestId('no-transcript')).toBeInTheDocument();
expect(screen.getByTestId('no-transcript')).toHaveTextContent(
'Transcript format is not supported, please check again.'
);
});
});
Expand Down Expand Up @@ -464,7 +506,7 @@ describe('Transcript component', () => {
await act(() => Promise.resolve());

await waitFor(() => {
expect(parseTranscriptMock).toHaveBeenCalledTimes(0);
expect(parseTranscriptMock).toHaveBeenCalledTimes(1);
expect(screen.queryByTestId('transcript_content_0')).toBeInTheDocument();
expect(screen.queryByTestId('no-transcript')).toBeInTheDocument();
expect(screen.getByTestId('no-transcript')).toHaveTextContent(
Expand Down
15 changes: 8 additions & 7 deletions src/services/transcript-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ const TRANSCRIPT_MIME_TYPES = [
{ type: 'application/json', ext: 'json' },
{ type: 'text/vtt', ext: 'vtt' },
{ type: 'text/plain', ext: 'txt' },
{ type: 'application/msword', ext: 'doc' },
{ type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', ext: 'docx' }
];

// ENum for describing transcript types include invalid and no transcript info
export const TRANSCRIPT_TYPES = { invalid: -1, noTranscript: 0, timedText: 1, plainText: 2, doc: 3 };
export const TRANSCRIPT_TYPES = { noSupport: -2, invalid: -1, noTranscript: 0, timedText: 1, plainText: 2, docx: 3 };

/**
* Parse the transcript information in the Manifest presented as supplementing annotations
Expand Down Expand Up @@ -229,7 +228,10 @@ export async function parseTranscriptData(url, canvasIndex) {
if (type.length > 0) {
fileType = type[0].ext;
} else {
fileType = url.split('.').reverse()[0];
let urlExt = url.split('.').reverse()[0];
// Only use this if it exists in the supported list of file types for the component
let filteredExt = TRANSCRIPT_MIME_TYPES.filter(tt => tt.ext === urlExt);
fileType = filteredExt.length > 0 ? urlExt : '';
}

// Return empty array to display an error message
Expand Down Expand Up @@ -263,13 +265,12 @@ export async function parseTranscriptData(url, canvasIndex) {
let parsedText = textData.replace(/\n/g, "<br />");
return { tData: [parsedText], tUrl: url, tType: TRANSCRIPT_TYPES.plainText, tFileExt: fileType };
}
// for .doc and .docx files
case 'doc':
// for .docx files
case 'docx':
tData = await parseWordFile(fileData);
return { tData: [tData], tUrl: url, tType: TRANSCRIPT_TYPES.doc, tFileExt: fileType };
return { tData: [tData], tUrl: url, tType: TRANSCRIPT_TYPES.docx, tFileExt: fileType };
default:
return { tData: [], tUrl: url };
return { tData: [], tUrl: url, tType: TRANSCRIPT_TYPES.noSupport };
}
}

Expand Down
26 changes: 21 additions & 5 deletions src/services/transcript-parser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -318,11 +318,11 @@ describe('transcript-parser', () => {
const fetchDoc = jest.spyOn(global, 'fetch').mockResolvedValueOnce({
status: 200,
headers: {
get: jest.fn(() => 'application/msword'),
get: jest.fn(() => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'),
},
blob: jest.fn(() => {
size: 11064;
type: 'application/msword';
type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
}),
});

Expand All @@ -333,14 +333,14 @@ describe('transcript-parser', () => {
});

const response = await transcriptParser.parseTranscriptData(
'https://example.com/transcript.doc',
'https://example.com/transcript.docx',
0
);

expect(fetchDoc).toHaveBeenCalledTimes(1);
expect(convertSpy).toHaveBeenCalledTimes(1);
expect(response.tData).toHaveLength(1);
expect(response.tFileExt).toEqual('doc');
expect(response.tFileExt).toEqual('docx');
});

test('with a WebVTT file URL', async () => {
Expand Down Expand Up @@ -387,7 +387,7 @@ describe('transcript-parser', () => {
expect(response.tFileExt).toEqual('vtt');
});

test('with invalid transcript file type: .png', async () => {
test('with unsupported transcript file type in URL: .png', async () => {
const fetchImage = jest.spyOn(global, 'fetch').mockResolvedValueOnce({
status: 200,
headers: { get: jest.fn(() => 'image/png') },
Expand All @@ -399,6 +399,22 @@ describe('transcript-parser', () => {
expect(fetchImage).toHaveBeenCalledTimes(1);
expect(response.tData).toEqual([]);
expect(response.tFileExt).toEqual(undefined);
expect(response.tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.noSupport);
});

test('with unsupported transcript file content-type: text/html', async () => {
const fetchDoc = jest.spyOn(global, 'fetch').mockResolvedValueOnce({
status: 200,
headers: { get: jest.fn(() => 'text/html') }
});
const response = await transcriptParser.parseTranscriptData(
'https://example.com/section/2/supplemental_files/12',
0
);
expect(fetchDoc).toHaveBeenCalledTimes(1);
expect(response.tData).toEqual([]);
expect(response.tFileExt).toEqual(undefined);
expect(response.tType).toEqual(transcriptParser.TRANSCRIPT_TYPES.noSupport);
});

test('with an invalid URL', async () => {
Expand Down

0 comments on commit 7ca2109

Please sign in to comment.