From e0d227cf9179bb2b26ee9299a86109d6f48f1cd5 Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Thu, 19 Feb 2026 14:39:02 -0700 Subject: [PATCH 1/8] Add Interlinearization model, PT9 refactor, and viewing-mode support - Introduce `Interlinearization` data model. - Move internal-only PT9 types to dedicated file and change case of props. - Removed `ScrTextName` prop as it's been deprecated in PT9 since 2020. - Enhance interlinearizer WebView to support switching between viewing modes: InterlinearData and Interlinearization. - Update Jest configuration to include path aliases for types and parsers. - Modify README to clarify the structure of the `src/types/` and `src/parsers/` directories. - Rename `interlinearXmlParser` and related tests to `paratext9parser`. - Add new words to cspell configuration for improved spell checking. --- README.md | 3 +- cspell.json | 13 +- jest.config.ts | 7 +- .../interlinearizer.web-view.test.tsx | 122 +++-- .../paratext-9/paratext9Converter.test.ts | 431 +++++++++++++++ .../paratext9Parser.test.ts} | 228 ++++---- src/interlinearizer.web-view.tsx | 72 ++- src/parsers/paratext-9/paratext-9-types.ts | 64 +++ src/parsers/paratext-9/paratext9Converter.ts | 286 ++++++++++ .../paratext9Parser.ts} | 50 +- src/parsers/{ => paratext-9}/pt9-xml.md | 5 +- src/types/interlinearizer-enums.ts | 58 ++ src/types/interlinearizer.d.ts | 517 ++++++++++++++++-- tsconfig.json | 5 +- 14 files changed, 1617 insertions(+), 244 deletions(-) create mode 100644 src/__tests__/parsers/paratext-9/paratext9Converter.test.ts rename src/__tests__/parsers/{interlinearXmlParser.test.ts => paratext-9/paratext9Parser.test.ts} (78%) create mode 100644 src/parsers/paratext-9/paratext-9-types.ts create mode 100644 src/parsers/paratext-9/paratext9Converter.ts rename src/parsers/{interlinearXmlParser.ts => paratext-9/paratext9Parser.ts} (86%) rename src/parsers/{ => paratext-9}/pt9-xml.md (95%) create mode 100644 src/types/interlinearizer-enums.ts diff --git a/README.md b/README.md index ecc24ea..64ef32c 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,8 @@ The general file structure for an extension is as follows: - `src/` contains the source code for the extension - `src/main.ts` is the main entry file for the extension (registers commands and wires interlinear XML) - `src/types/interlinearizer.d.ts` is this extension's types file that defines how other extensions can use this extension through the `papi`. It is copied into the build folder - - `src/parsers/interlinearXmlParser.ts` parses interlinear XML into structured data (uses fast-xml-parser). The PT9 XML schema and parsed output are documented in `src/parsers/pt9-xml.md` + - `src/types/` also holds shared enums and type modules (e.g. `interlinearizer-enums.ts`). Use the path alias `types/interlinearizer-enums` in imports instead of relative paths (see `tsconfig.json` paths). + - `src/parsers/` contains all parsers and converters used when importing external data models sorted by source (e.g. Paratext 9 XML Files). Use the path alias `parsers/...` in imports instead of relative paths (see `tsconfig.json` paths). - `*.web-view.tsx` files will be treated as React WebViews - `*.web-view.scss` files provide styles for WebViews - `*.web-view.html` files are a conventional way to provide HTML WebViews (no special functionality) diff --git a/cspell.json b/cspell.json index 8760a2f..4046bd8 100644 --- a/cspell.json +++ b/cspell.json @@ -16,7 +16,10 @@ "appdata", "asyncs", "autodocs", + "BCVWP", + "behaviour", "dockbox", + "Eflomal", "electronmon", "endregion", "finalizer", @@ -24,10 +27,14 @@ "guids", "hopkinson", "iframes", + "interlineardata", "interlinearization", + "interlinearizations", "interlinearizer", + "jsmith", "localstorage", "maximizable", + "Morphosyntactic", "networkable", "Newtonsoft", "nodebuffer", @@ -40,6 +47,7 @@ "pdps", "plusplus", "proxied", + "punc", "reinitializing", "reserialized", "sillsdev", @@ -47,15 +55,18 @@ "stringifiable", "Stylesheet", "typedefs", + "unanalyzed", "unregistering", "unregisters", + "unreviewed", "unsub", "unsubs", "unsubscriber", "unsubscribers", "usfm", "verseref", - "versification" + "versification", + "wordform" ], "ignoreWords": [], "import": [] diff --git a/jest.config.ts b/jest.config.ts index 2e43f5f..ef6987d 100644 --- a/jest.config.ts +++ b/jest.config.ts @@ -27,7 +27,7 @@ const config: Config = { 'src/parsers/**/*.ts', 'src/main.ts', 'src/**/*.web-view.tsx', - '!src/parsers/**/*.d.ts', + '!src/parsers/**/*-types.ts', '!src/**/__tests__/**', '!src/**/*.test.{ts,tsx}', '!src/**/*.spec.{ts,tsx}', @@ -70,11 +70,12 @@ const config: Config = { */ moduleNameMapper: { /** - * Resolve src-rooted path aliases so tests can use e.g. "@main" or "parsers/..." instead of - * relative paths. Must match tsconfig.json "paths" and webpack resolve.alias. + * Resolve src-rooted path aliases so tests can use e.g. "@main", "parsers/...", or "types/..." + * instead of relative paths. Must match tsconfig.json "paths" and webpack resolve.alias. */ '^@main$': '/src/main', '^parsers/(.*)$': '/src/parsers/$1', + '^types/(.*)$': '/src/types/$1', '\\.(sa|sc|c)ss$': '/__mocks__/styleMock.ts', '\\.(jpg|jpeg|png|gif|eot|otf|webp|svg|ttf|woff|woff2|mp4|webm|wav|mp3|m4a|aac|oga)$': '/__mocks__/fileMock.ts', diff --git a/src/__tests__/interlinearizer.web-view.test.tsx b/src/__tests__/interlinearizer.web-view.test.tsx index f1a0d32..67cc843 100644 --- a/src/__tests__/interlinearizer.web-view.test.tsx +++ b/src/__tests__/interlinearizer.web-view.test.tsx @@ -4,18 +4,38 @@ import type { WebViewProps } from '@papi/core'; import type { SerializedVerseRef } from '@sillsdev/scripture'; -import { render, screen } from '@testing-library/react'; -import { InterlinearXmlParser } from 'parsers/interlinearXmlParser'; - -/** Mock parser to allow overriding constructor behavior per test. */ -jest.mock('parsers/interlinearXmlParser', () => { - const actual = jest.requireActual( - 'parsers/interlinearXmlParser', - ); - return { - InterlinearXmlParser: jest.fn().mockImplementation(() => new actual.InterlinearXmlParser()), - }; -}); +import { fireEvent, render, screen } from '@testing-library/react'; +import type { InterlinearData } from 'paratext-9-types'; + +/** Stub InterlinearData returned by the mocked parser. Matches shape the WebView displays. */ +const stubInterlinearData: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: {}, +}; + +/** Stub Interlinearization returned by the mocked converter. Matches shape the WebView displays. */ +const stubInterlinearization = { + id: 'mock-interlinear-id', + sourceWritingSystem: '', + analysisLanguages: ['en'], + books: [{ id: 'mock-book-id', bookRef: 'MAT', textVersion: '', segments: [] }], +}; + +const mockParse = jest.fn().mockReturnValue(stubInterlinearData); +const mockConvert = jest.fn().mockReturnValue(stubInterlinearization); + +/** Mock parser: no real XML parsing; returns stub data. Parser/converter are tested elsewhere. */ +jest.mock('parsers/paratext-9/paratext9Parser', () => ({ + Paratext9Parser: jest.fn().mockImplementation(() => ({ + parse: mockParse, + })), +})); + +/** Mock converter: no real conversion; returns stub Interlinearization. */ +jest.mock('parsers/paratext-9/paratext9Converter', () => ({ + convertParatext9ToInterlinearization: mockConvert, +})); /** * Load the WebView module; it assigns the component to globalThis.webViewComponent. This pattern is @@ -66,37 +86,41 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/test-data\/Interlinear_en_MAT\.xml/i)).toBeInTheDocument(); }); - it('parses the bundled test XML and displays parsed JSON', () => { + it('renders the JSON view mode switch (InterlinearData / Interlinearization)', () => { render(); - expect(screen.getByText(/parsed interlinear data \(json\)/i)).toBeInTheDocument(); - expect(screen.getByText(/"GlossLanguage"/)).toBeInTheDocument(); - expect(screen.getByText(/"BookId"/)).toBeInTheDocument(); + const group = screen.getByRole('group', { name: /json view mode/i }); + expect(group).toBeInTheDocument(); + expect(screen.getByRole('button', { name: /^interlineardata$/i })).toBeInTheDocument(); + expect(screen.getByRole('button', { name: /^interlinearization$/i })).toBeInTheDocument(); + expect(screen.getByText(/view json as:/i)).toBeInTheDocument(); }); - it('displays parsed structure with expected verse data', () => { + it('displays InterlinearData JSON by default when parser returns data', () => { + render(); + + expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByText(/glossLanguage/i)).toBeInTheDocument(); + expect(screen.getByText(/bookId/i)).toBeInTheDocument(); + }); + + it('displays parsed structure including glossLanguage and bookId values', () => { render(); expect(screen.getByText(/"en"/)).toBeInTheDocument(); expect(screen.getByText(/"MAT"/)).toBeInTheDocument(); }); - it('does not show parse error when XML is valid', () => { + it('does not show parse error when parser succeeds', () => { render(); expect(screen.queryByText(/^parse error$/i)).not.toBeInTheDocument(); }); it('displays parse error when parser throws an Error (uses err.message)', () => { - const actual = jest.requireActual( - '../parsers/interlinearXmlParser', - ); - const realInstance = new actual.InterlinearXmlParser(); - const throwingParse = (): never => { + mockParse.mockImplementationOnce(() => { throw new Error('Invalid XML structure'); - }; - Object.defineProperty(realInstance, 'parse', { value: throwingParse, writable: true }); - jest.mocked(InterlinearXmlParser).mockImplementationOnce(() => realInstance); + }); render(); @@ -104,18 +128,48 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/invalid xml structure/i)).toBeInTheDocument(); }); + it('switching to Interlinearization shows converted model JSON', () => { + render(); + + fireEvent.click(screen.getByRole('button', { name: /^interlinearization$/i })); + + expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByText(/analysisLanguages/i)).toBeInTheDocument(); + expect(screen.getByText(/sourceWritingSystem/i)).toBeInTheDocument(); + expect(screen.getByText(/segments/i)).toBeInTheDocument(); + }); + + it('switching back to InterlinearData shows PT9 structure JSON', () => { + render(); + + fireEvent.click(screen.getByRole('button', { name: /^interlinearization$/i })); + expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); + + fireEvent.click(screen.getByRole('button', { name: /^interlineardata$/i })); + + expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByText(/glossLanguage/i)).toBeInTheDocument(); + expect(screen.getByText(/bookId/i)).toBeInTheDocument(); + }); + + it('renders empty JSON pre when jsonToShow is undefined (converter returns undefined)', () => { + mockConvert.mockReturnValueOnce(undefined); + + const { container } = render(); + fireEvent.click(screen.getByRole('button', { name: /^interlinearization$/i })); + + const jsonPre = container.querySelector('pre'); + expect(jsonPre).toBeInTheDocument(); + expect(jsonPre).toBeEmptyDOMElement(); + expect(jsonPre).not.toHaveTextContent('undefined'); + }); + it('displays parse error when parser throws non-Error (uses String(err))', () => { - const actual = jest.requireActual( - '../parsers/interlinearXmlParser', - ); - const realInstance = new actual.InterlinearXmlParser(); - const throwingParse = (): never => { + mockParse.mockImplementationOnce(() => { // Intentionally throw a non-Error to test the String(err) branch in the catch block. // eslint-disable-next-line no-throw-literal -- testing non-Error handling throw 'plain string error'; - }; - Object.defineProperty(realInstance, 'parse', { value: throwingParse, writable: true }); - jest.mocked(InterlinearXmlParser).mockImplementationOnce(() => realInstance); + }); render(); diff --git a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts new file mode 100644 index 0000000..7a86cdf --- /dev/null +++ b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts @@ -0,0 +1,431 @@ +/** @file Unit tests for {@link convertParatext9ToInterlinearization}. */ +/// + +import type { InterlinearData } from 'paratext-9-types'; +import { convertParatext9ToInterlinearization } from 'parsers/paratext-9/paratext9Converter'; + +describe('convertParatext9ToInterlinearization', () => { + describe('top-level structure', () => { + it('produces Interlinearization with id, sourceWritingSystem, analysisLanguages, books', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: {}, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result).toHaveProperty('id'); + expect(result).toHaveProperty('sourceWritingSystem', ''); + expect(result).toHaveProperty('analysisLanguages'); + expect(Array.isArray(result.analysisLanguages)).toBe(true); + expect(result).toHaveProperty('books'); + expect(Array.isArray(result.books)).toBe(true); + }); + + it('uses bookId for interlinearization id (lowercase, spaces to dashes)', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'RUT', + verses: {}, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.id).toBe('rut-interlinear'); + }); + + it('produces id mat-interlinear when bookId is MAT', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: {}, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.id).toBe('mat-interlinear'); + }); + + it('sets analysisLanguages from glossLanguage', () => { + const data: InterlinearData = { + glossLanguage: 'fr', + bookId: 'GEN', + verses: {}, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.analysisLanguages).toEqual(['fr']); + }); + + it('produces exactly one AnalyzedBook with id, bookRef, textVersion, segments', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: {}, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books).toHaveLength(1); + const book = result.books[0]; + expect(book).toHaveProperty('id', 'mat'); + expect(book).toHaveProperty('bookRef', 'MAT'); + expect(book).toHaveProperty('textVersion'); + expect(book).toHaveProperty('segments'); + expect(Array.isArray(book.segments)).toBe(true); + }); + }); + + describe('empty verses', () => { + it('returns empty segments array and empty textVersion when verses is empty', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: {}, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments).toEqual([]); + expect(result.books[0].textVersion).toBe(''); + }); + }); + + describe('verse to segment conversion', () => { + it('converts one verse with one cluster to one segment with one word occurrence', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 4 }, + lexemes: [{ lexemeId: 'Word:word', senseId: 'sense1' }], + lexemesId: 'Word:word', + id: 'Word:word/0-4', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments).toHaveLength(1); + const seg = result.books[0].segments[0]; + expect(seg.id).toBe('mat-1:1'); + expect(seg.segmentRef).toBe('MAT 1:1'); + expect(seg.baselineText).toBe(''); + expect(seg.occurrences).toHaveLength(1); + + const occ = seg.occurrences[0]; + expect(occ.id).toBe('mat-1:1-occ-0-Word:word/0-4'); + expect(occ.segmentId).toBe('mat-1:1'); + expect(occ.index).toBe(0); + expect(occ.anchor).toBe('0-4'); + expect(occ.surfaceText).toBe(''); + expect(occ.writingSystem).toBe(''); + expect(occ.type).toBe('word'); + expect(occ.assignments).toHaveLength(1); + + const assign = occ.assignments[0]; + expect(assign.occurrenceId).toBe(occ.id); + expect(assign.analysisId).toBe('analysis-en-Word:word-sense1'); + expect(assign.status).toBe('suggested'); + expect(assign.id).toBe(`assign-${occ.id}-analysis-en-Word:word-sense1`); + }); + + it('uses verse hash for textVersion and sets assignment status to approved when verse has hash', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: 'ABC123', + clusters: [ + { + textRange: { index: 0, length: 4 }, + lexemes: [{ lexemeId: 'Word:word', senseId: 's1' }], + lexemesId: 'Word:word', + id: 'Word:word/0-4', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].textVersion).toBe('ABC123'); + expect(result.books[0].segments[0].occurrences[0].assignments[0].status).toBe('approved'); + }); + }); + + describe('assignment status from verse hash', () => { + it('sets assignment status to suggested when verse has no hash', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 4 }, + lexemes: [{ lexemeId: 'Word:w', senseId: '' }], + lexemesId: 'Word:w', + id: 'Word:w/0-4', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments[0].occurrences[0].assignments[0].status).toBe('suggested'); + }); + + it('sets assignment status to approved when verse has hash', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: 'H1', + clusters: [ + { + textRange: { index: 0, length: 4 }, + lexemes: [{ lexemeId: 'Word:w', senseId: '' }], + lexemesId: 'Word:w', + id: 'Word:w/0-4', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments[0].occurrences[0].assignments[0].status).toBe('approved'); + }); + }); + + describe('cluster with multiple lexemes', () => { + it('creates one word occurrence with multiple assignments (one per lexeme)', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 5, length: 5 }, + lexemes: [ + { lexemeId: 'Stem:hello', senseId: 'g1' }, + { lexemeId: 'Suffix:ing', senseId: 'g2' }, + ], + lexemesId: 'Stem:hello/Suffix:ing', + id: 'Stem:hello/Suffix:ing/5-5', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + const occ = result.books[0].segments[0].occurrences[0]; + expect(occ.assignments).toHaveLength(2); + expect(occ.assignments.map((a) => a.analysisId)).toEqual([ + 'analysis-en-Stem:hello-g1', + 'analysis-en-Suffix:ing-g2', + ]); + expect(occ.anchor).toBe('5-5'); + }); + }); + + describe('punctuation occurrences', () => { + it('converts punctuations to punctuation occurrences after word occurrences (surfaceText from afterText when present)', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 1 }, + lexemes: [{ lexemeId: 'x', senseId: '' }], + lexemesId: 'x', + id: 'x/0-1', + excluded: false, + }, + ], + punctuations: [ + { + textRange: { index: 34, length: 2 }, + beforeText: '? ', + afterText: '? ', + }, + ], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + const seg = result.books[0].segments[0]; + expect(seg.occurrences).toHaveLength(2); + + const puncOcc = seg.occurrences[1]; + expect(puncOcc.type).toBe('punctuation'); + expect(puncOcc.anchor).toBe('34-2'); + expect(puncOcc.surfaceText).toBe('? '); // afterText preferred in implementation + expect(puncOcc.assignments).toEqual([]); + expect(puncOcc.index).toBe(1); + expect(puncOcc.id).toBe('mat-1:1-punc-1-34-2'); + }); + + it('uses beforeText for surfaceText when afterText is empty', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [], + punctuations: [{ textRange: { index: 0, length: 1 }, beforeText: ',', afterText: '' }], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments[0].occurrences[0].surfaceText).toBe(','); + }); + + it('uses empty surfaceText when both beforeText and afterText are empty', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [], + punctuations: [{ textRange: { index: 0, length: 1 }, beforeText: '', afterText: '' }], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments[0].occurrences[0].surfaceText).toBe(''); + }); + }); + + describe('verse with no clusters', () => { + it('produces segment with empty occurrences when verse has no clusters and no punctuations', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments).toHaveLength(1); + expect(result.books[0].segments[0].occurrences).toEqual([]); + expect(result.books[0].segments[0].id).toBe('mat-1:1'); + expect(result.books[0].segments[0].segmentRef).toBe('MAT 1:1'); + }); + }); + + describe('lexeme without senseId', () => { + it('generates analysis id without sense suffix when senseId is empty', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 1 }, + lexemes: [{ lexemeId: 'Word:a', senseId: '' }], + lexemesId: 'Word:a', + id: 'Word:a/0-1', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments[0].occurrences[0].assignments[0].analysisId).toBe( + 'analysis-en-Word:a', + ); + }); + }); + + describe('segment and occurrence IDs', () => { + it('generates segment id from verseRef (lowercase, spaces to dashes)', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 4 }, + lexemes: [{ lexemeId: 'W:w', senseId: '' }], + lexemesId: 'W:w', + id: 'W:w/0-4', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + expect(result.books[0].segments[0].id).toBe('mat-1:1'); + }); + + it('generates occurrence id from segmentId, cluster id, and index', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 4 }, + lexemes: [{ lexemeId: 'Word:word', senseId: 's1' }], + lexemesId: 'Word:word', + id: 'Word:word/0-4', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = convertParatext9ToInterlinearization(data); + + const segId = result.books[0].segments[0].id; + expect(result.books[0].segments[0].occurrences[0].id).toBe(`${segId}-occ-0-Word:word/0-4`); + }); + }); +}); diff --git a/src/__tests__/parsers/interlinearXmlParser.test.ts b/src/__tests__/parsers/paratext-9/paratext9Parser.test.ts similarity index 78% rename from src/__tests__/parsers/interlinearXmlParser.test.ts rename to src/__tests__/parsers/paratext-9/paratext9Parser.test.ts index 875789f..eed880b 100644 --- a/src/__tests__/parsers/interlinearXmlParser.test.ts +++ b/src/__tests__/parsers/paratext-9/paratext9Parser.test.ts @@ -1,16 +1,16 @@ -/** @file Unit tests for {@link InterlinearXmlParser}. */ +/** @file Unit tests for {@link Paratext9Parser}. */ /// import * as fs from 'fs'; import * as path from 'path'; -import { InterlinearXmlParser } from 'parsers/interlinearXmlParser'; +import { Paratext9Parser } from 'parsers/paratext-9/paratext9Parser'; -describe('InterlinearXmlParser', () => { - let parser: InterlinearXmlParser; +describe('Paratext9Parser', () => { + let parser: Paratext9Parser; beforeEach(() => { - parser = new InterlinearXmlParser(); + parser = new Paratext9Parser(); }); describe('parse() - valid XML', () => { @@ -33,30 +33,29 @@ describe('InterlinearXmlParser', () => { const result = parser.parse(xml); expect(result).toEqual({ - ScrTextName: '', - GlossLanguage: 'en', - BookId: 'MAT', - Verses: { + glossLanguage: 'en', + bookId: 'MAT', + verses: { 'MAT 1:1': { - Hash: '', - Clusters: [ + hash: '', + clusters: [ { - TextRange: { Index: 0, Length: 4 }, - Lexemes: [{ LexemeId: 'Word:word', SenseId: 'sense1' }], - LexemesId: 'Word:word', - Id: 'Word:word/0-4', - Excluded: false, + textRange: { index: 0, length: 4 }, + lexemes: [{ lexemeId: 'Word:word', senseId: 'sense1' }], + lexemesId: 'Word:word', + id: 'Word:word/0-4', + excluded: false, }, ], - Punctuations: [], + punctuations: [], }, }, }); }); - it('parses optional ScrTextName and verse Hash', () => { + it('parses verse Hash', () => { const xml = ` - + RUT 3:1 @@ -72,13 +71,12 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.ScrTextName).toBe('MyProject'); - expect(result.Verses['RUT 3:1'].Hash).toBe('ABC123'); + expect(result.verses['RUT 3:1'].hash).toBe('ABC123'); }); it('parses purely numeric verse Hash', () => { const xml = ` - + RUT 3:1 @@ -94,8 +92,7 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.ScrTextName).toBe('MyProject'); - expect(result.Verses['RUT 3:1'].Hash).toBe('123456'); + expect(result.verses['RUT 3:1'].hash).toBe('123456'); }); it('parses cluster with multiple lexemes and builds LexemesId and Id correctly', () => { @@ -117,13 +114,13 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - const cluster = result.Verses['MAT 1:1'].Clusters[0]; - expect(cluster.Lexemes).toEqual([ - { LexemeId: 'Stem:hello', SenseId: 'g1' }, - { LexemeId: 'Suffix:ing', SenseId: 'g2' }, + const cluster = result.verses['MAT 1:1'].clusters[0]; + expect(cluster.lexemes).toEqual([ + { lexemeId: 'Stem:hello', senseId: 'g1' }, + { lexemeId: 'Suffix:ing', senseId: 'g2' }, ]); - expect(cluster.LexemesId).toBe('Stem:hello/Suffix:ing'); - expect(cluster.Id).toBe('Stem:hello/Suffix:ing/5-5'); + expect(cluster.lexemesId).toBe('Stem:hello/Suffix:ing'); + expect(cluster.id).toBe('Stem:hello/Suffix:ing/5-5'); }); it('parses lexeme Id containing slash: LexemesId and Id preserve the slash (slash-safe)', () => { @@ -144,10 +141,10 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - const cluster = result.Verses['MAT 1:1'].Clusters[0]; - expect(cluster.Lexemes).toEqual([{ LexemeId: 'Word:hello/world', SenseId: 'g1' }]); - expect(cluster.LexemesId).toBe('Word:hello/world'); - expect(cluster.Id).toBe('Word:hello/world/0-12'); + const cluster = result.verses['MAT 1:1'].clusters[0]; + expect(cluster.lexemes).toEqual([{ lexemeId: 'Word:hello/world', senseId: 'g1' }]); + expect(cluster.lexemesId).toBe('Word:hello/world'); + expect(cluster.id).toBe('Word:hello/world/0-12'); }); it('preserves slash when joining Lexeme Ids (multiple lexemes, one Id contains slash)', () => { @@ -169,13 +166,13 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - const cluster = result.Verses['MAT 1:1'].Clusters[0]; - expect(cluster.Lexemes).toEqual([ - { LexemeId: 'Stem:foo/bar', SenseId: 'g1' }, - { LexemeId: 'Suffix:ing', SenseId: 'g2' }, + const cluster = result.verses['MAT 1:1'].clusters[0]; + expect(cluster.lexemes).toEqual([ + { lexemeId: 'Stem:foo/bar', senseId: 'g1' }, + { lexemeId: 'Suffix:ing', senseId: 'g2' }, ]); - expect(cluster.LexemesId).toBe('Stem:foo/bar/Suffix:ing'); - expect(cluster.Id).toBe('Stem:foo/bar/Suffix:ing/5-11'); + expect(cluster.lexemesId).toBe('Stem:foo/bar/Suffix:ing'); + expect(cluster.id).toBe('Stem:foo/bar/Suffix:ing/5-11'); }); it('parses cluster with no lexemes: Id is Index-Length only (no leading slash)', () => { @@ -195,10 +192,10 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - const cluster = result.Verses['MAT 1:1'].Clusters[0]; - expect(cluster.Lexemes).toEqual([]); - expect(cluster.LexemesId).toBe(''); - expect(cluster.Id).toBe('10-3'); + const cluster = result.verses['MAT 1:1'].clusters[0]; + expect(cluster.lexemes).toEqual([]); + expect(cluster.lexemesId).toBe(''); + expect(cluster.id).toBe('10-3'); }); it('parses Lexeme without GlossId as empty SenseId', () => { @@ -219,9 +216,9 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1'].Clusters[0].Lexemes[0]).toEqual({ - LexemeId: 'Word:a', - SenseId: '', + expect(result.verses['MAT 1:1'].clusters[0].lexemes[0]).toEqual({ + lexemeId: 'Word:a', + senseId: '', }); }); @@ -244,7 +241,7 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1'].Clusters[0].Excluded).toBe(true); + expect(result.verses['MAT 1:1'].clusters[0].excluded).toBe(true); }); it('parses Cluster with Excluded=false', () => { @@ -266,7 +263,7 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1'].Clusters[0].Excluded).toBe(false); + expect(result.verses['MAT 1:1'].clusters[0].excluded).toBe(false); }); it('parses Cluster without Excluded as Excluded=false', () => { @@ -287,7 +284,7 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1'].Clusters[0].Excluded).toBe(false); + expect(result.verses['MAT 1:1'].clusters[0].excluded).toBe(false); }); it('parses Punctuation with Range, BeforeText, AfterText', () => { @@ -314,11 +311,11 @@ describe('InterlinearXmlParser', () => { const result = parser.parse(xml); // Parser uses trimValues: false, so tag text is not trimmed. - expect(result.Verses['MAT 1:1'].Punctuations).toEqual([ + expect(result.verses['MAT 1:1'].punctuations).toEqual([ { - TextRange: { Index: 34, Length: 2 }, - BeforeText: '? ', - AfterText: '? ', + textRange: { index: 34, length: 2 }, + beforeText: '? ', + afterText: '? ', }, ]); }); @@ -350,11 +347,11 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1'].Punctuations).toHaveLength(1); - expect(result.Verses['MAT 1:1'].Punctuations[0]).toEqual({ - TextRange: { Index: 1, Length: 2 }, - BeforeText: 'c', - AfterText: 'd', + expect(result.verses['MAT 1:1'].punctuations).toHaveLength(1); + expect(result.verses['MAT 1:1'].punctuations[0]).toEqual({ + textRange: { index: 1, length: 2 }, + beforeText: 'c', + afterText: 'd', }); }); @@ -396,11 +393,11 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1'].Punctuations).toHaveLength(1); - expect(result.Verses['MAT 1:1'].Punctuations[0]).toEqual({ - TextRange: { Index: 5, Length: 1 }, - BeforeText: 'valid', - AfterText: '', + expect(result.verses['MAT 1:1'].punctuations).toHaveLength(1); + expect(result.verses['MAT 1:1'].punctuations[0]).toEqual({ + textRange: { index: 5, length: 1 }, + beforeText: 'valid', + afterText: '', }); }); @@ -425,11 +422,11 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1'].Punctuations).toHaveLength(1); - expect(result.Verses['MAT 1:1'].Punctuations[0]).toEqual({ - TextRange: { Index: 10, Length: 1 }, - BeforeText: '', - AfterText: '', + expect(result.verses['MAT 1:1'].punctuations).toHaveLength(1); + expect(result.verses['MAT 1:1'].punctuations[0]).toEqual({ + textRange: { index: 10, length: 1 }, + beforeText: '', + afterText: '', }); }); @@ -460,9 +457,9 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(Object.keys(result.Verses)).toEqual(['MAT 1:1', 'MAT 1:2']); - expect(result.Verses['MAT 1:1'].Clusters[0].Lexemes[0].LexemeId).toBe('a'); - expect(result.Verses['MAT 1:2'].Clusters[0].Lexemes[0].LexemeId).toBe('b'); + expect(Object.keys(result.verses)).toEqual(['MAT 1:1', 'MAT 1:2']); + expect(result.verses['MAT 1:1'].clusters[0].lexemes[0].lexemeId).toBe('a'); + expect(result.verses['MAT 1:2'].clusters[0].lexemes[0].lexemeId).toBe('b'); }); it('parses item with missing VerseData as empty Hash, Clusters, Punctuations', () => { @@ -477,10 +474,10 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1']).toEqual({ - Hash: '', - Clusters: [], - Punctuations: [], + expect(result.verses['MAT 1:1']).toEqual({ + hash: '', + clusters: [], + punctuations: [], }); }); @@ -497,10 +494,10 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:11']).toEqual({ - Hash: '', - Clusters: [], - Punctuations: [], + expect(result.verses['MAT 1:11']).toEqual({ + hash: '', + clusters: [], + punctuations: [], }); }); @@ -523,12 +520,12 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses['MAT 1:1'].Clusters).toEqual([]); - expect(result.Verses['MAT 1:1'].Punctuations).toHaveLength(1); - expect(result.Verses['MAT 1:1'].Punctuations[0]).toEqual({ - TextRange: { Index: 0, Length: 1 }, - BeforeText: ',', - AfterText: ',', + expect(result.verses['MAT 1:1'].clusters).toEqual([]); + expect(result.verses['MAT 1:1'].punctuations).toHaveLength(1); + expect(result.verses['MAT 1:1'].punctuations[0]).toEqual({ + textRange: { index: 0, length: 1 }, + beforeText: ',', + afterText: ',', }); }); @@ -542,9 +539,9 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(result.Verses).toEqual({}); - expect(result.GlossLanguage).toBe('en'); - expect(result.BookId).toBe('MAT'); + expect(result.verses).toEqual({}); + expect(result.glossLanguage).toBe('en'); + expect(result.bookId).toBe('MAT'); }); it('skips items with missing string (verse key)', () => { @@ -573,40 +570,47 @@ describe('InterlinearXmlParser', () => { `; const result = parser.parse(xml); - expect(Object.keys(result.Verses)).toEqual(['MAT 1:1']); - expect(result.Verses['MAT 1:1'].Clusters[0].Lexemes[0].LexemeId).toBe('y'); + expect(Object.keys(result.verses)).toEqual(['MAT 1:1']); + expect(result.verses['MAT 1:1'].clusters[0].lexemes[0].lexemeId).toBe('y'); }); it('parses real test-data file without throwing', () => { - const xmlPath = path.join(__dirname, '..', '..', '..', 'test-data', 'Interlinear_en_MAT.xml'); + const xmlPath = path.join( + __dirname, + '..', + '..', + '..', + '..', + 'test-data', + 'Interlinear_en_MAT.xml', + ); const xml = fs.readFileSync(xmlPath, 'utf-8'); const result = parser.parse(xml); - expect(result.GlossLanguage).toBe('en'); - expect(result.BookId).toBe('MAT'); - expect(result.ScrTextName).toBe(''); - expect(Object.keys(result.Verses).length).toBeGreaterThan(0); + expect(result.glossLanguage).toBe('en'); + expect(result.bookId).toBe('MAT'); + expect(Object.keys(result.verses).length).toBeGreaterThan(0); - const mat11 = result.Verses['MAT 1:1']; + const mat11 = result.verses['MAT 1:1']; expect(mat11).toBeDefined(); - expect(mat11.Hash).toBe('C8D38188'); - expect(mat11.Clusters.length).toBeGreaterThan(0); - const firstCluster = mat11.Clusters[0]; - expect(firstCluster.TextRange).toEqual({ Index: 5, Length: 5 }); - expect(firstCluster.Lexemes[0]).toEqual({ - LexemeId: 'Word:hello', - SenseId: 'WvbPwa9D', + expect(mat11.hash).toBe('C8D38188'); + expect(mat11.clusters.length).toBeGreaterThan(0); + const firstCluster = mat11.clusters[0]; + expect(firstCluster.textRange).toEqual({ index: 5, length: 5 }); + expect(firstCluster.lexemes[0]).toEqual({ + lexemeId: 'Word:hello', + senseId: 'WvbPwa9D', }); - expect(firstCluster.Id).toMatch(/^Word:hello\/5-5$/); + expect(firstCluster.id).toMatch(/^Word:hello\/5-5$/); - const versesWithPunctuation = Object.values(result.Verses).filter( - (v) => v.Punctuations.length > 0, + const versesWithPunctuation = Object.values(result.verses).filter( + (v) => v.punctuations.length > 0, ); expect(versesWithPunctuation.length).toBeGreaterThan(0); const [firstWithPunctuation] = versesWithPunctuation; - expect(firstWithPunctuation.Punctuations[0]).toHaveProperty('TextRange'); - expect(firstWithPunctuation.Punctuations[0]).toHaveProperty('BeforeText'); - expect(firstWithPunctuation.Punctuations[0]).toHaveProperty('AfterText'); + expect(firstWithPunctuation.punctuations[0]).toHaveProperty('textRange'); + expect(firstWithPunctuation.punctuations[0]).toHaveProperty('beforeText'); + expect(firstWithPunctuation.punctuations[0]).toHaveProperty('afterText'); }); }); @@ -802,8 +806,8 @@ describe('InterlinearXmlParser', () => { describe('constructor and instance', () => { it('can be instantiated multiple times', () => { - const p1 = new InterlinearXmlParser(); - const p2 = new InterlinearXmlParser(); + const p1 = new Paratext9Parser(); + const p2 = new Paratext9Parser(); const xml = ` diff --git a/src/interlinearizer.web-view.tsx b/src/interlinearizer.web-view.tsx index f47bd69..4d1a14d 100644 --- a/src/interlinearizer.web-view.tsx +++ b/src/interlinearizer.web-view.tsx @@ -1,6 +1,7 @@ -import { useMemo } from 'react'; -import type { InterlinearData } from 'interlinearizer'; -import { InterlinearXmlParser } from './parsers/interlinearXmlParser'; +import { useMemo, useState } from 'react'; +import type { InterlinearData } from 'paratext-9-types'; +import { Paratext9Parser } from './parsers/paratext-9/paratext9Parser'; +import { convertParatext9ToInterlinearization } from './parsers/paratext-9/paratext9Converter'; /** Test interlinear XML bundled at build time (from test-data/Interlinear_en_MAT.xml). */ import testXml from '../test-data/Interlinear_en_MAT.xml?raw'; @@ -8,15 +9,22 @@ import testXml from '../test-data/Interlinear_en_MAT.xml?raw'; /** Result of parsing the bundled test XML: either data or an error message. */ type ParseResult = { data: InterlinearData; error: undefined } | { data: undefined; error: string }; +/** View mode for the JSON display: raw PT9 structure or converted interlinearizer model. */ +type JsonViewMode = 'interlinear-data' | 'interlinearization'; + /** * Main interlinearizer WebView. Parses the bundled test XML into the interlinear model and displays * the result as raw JSON. No PAPI commands or file loading—everything is self-contained. * - * Parser is created inside useMemo so parsing runs once per mount. + * A switch lets the user choose between viewing {@link InterlinearData} (Paratext 9 format) or + * {@link Interlinearization} (converted interlinearizer model). Parser is created inside useMemo so + * parsing runs once per mount. */ globalThis.webViewComponent = function InterlinearizerWebView() { + const [jsonViewMode, setJsonViewMode] = useState('interlinear-data'); + const { data: parsed, error: parseError } = useMemo((): ParseResult => { - const parser = new InterlinearXmlParser(); + const parser = new Paratext9Parser(); try { const data = parser.parse(testXml); return { data, error: undefined }; @@ -25,6 +33,14 @@ globalThis.webViewComponent = function InterlinearizerWebView() { } }, []); + const interlinearization = useMemo( + () => (parsed ? convertParatext9ToInterlinearization(parsed) : undefined), + [parsed], + ); + + /** In Interlinearization mode use converted data (may be undefined); otherwise use parsed. */ + const jsonToShow = jsonViewMode === 'interlinearization' ? interlinearization : parsed; + return (

Interlinearizer

@@ -43,9 +59,51 @@ globalThis.webViewComponent = function InterlinearizerWebView() { {parsed && ( <> -

Parsed interlinear data (JSON):

+
+ View JSON as: +
+ + +
+

+ {jsonViewMode === 'interlinear-data' + ? 'Paratext 9 book/verse/cluster structure.' + : 'Converted interlinearizer book/segment/occurrence model.'} +

+
+

+ {jsonViewMode === 'interlinear-data' + ? 'InterlinearData (JSON):' + : 'Interlinearization (JSON):'} +

-            {JSON.stringify(parsed, undefined, 2)}
+            {jsonToShow ? JSON.stringify(jsonToShow, undefined, 2) : ''}
           
)} diff --git a/src/parsers/paratext-9/paratext-9-types.ts b/src/parsers/paratext-9/paratext-9-types.ts new file mode 100644 index 0000000..263bbd7 --- /dev/null +++ b/src/parsers/paratext-9/paratext-9-types.ts @@ -0,0 +1,64 @@ +declare module 'paratext-9-types' { + /** Character range in source text (Index, Length). */ + export interface StringRange { + /** Start index of the range in the source text (0-based). */ + index: number; + /** Number of characters in the range. */ + length: number; + } + + /** Data on the interlinearization of a single lexeme. */ + export interface LexemeData { + /** ID of the lexeme (e.g. from Lexicon; XML attribute Id). */ + lexemeId: string; + /** ID of the sense/gloss used for this lexeme (XML attribute GlossId). */ + senseId: string; + } + + /** Data on the interlinearization of a cluster. */ + export interface ClusterData { + /** Character range this cluster occupies in the verse text. */ + textRange: StringRange; + /** Lexemes in this cluster, in order. */ + lexemes: LexemeData[]; + /** Slash-joined LexemeIds for this cluster (e.g. "Word:a/Word:b"). */ + lexemesId: string; + /** Unique cluster id: LexemesId plus TextRange (e.g. "Word:a/Word:b/21-3"). */ + id: string; + /** Excluded flag. See [pt9-xml.md](../parsers/pt9-xml.md) for details. */ + excluded: boolean; + } + + /** Data on punctuation change. */ + export interface PunctuationData { + /** Character range this punctuation occupies in the verse text. */ + textRange: StringRange; + /** Punctuation text before the change (or empty). */ + beforeText: string; + /** Punctuation text after the change (or empty). */ + afterText: string; + } + + /** Interlinear data for a single verse. */ + export interface VerseData { + /** Hash of verse text when approved; empty string if not approved. */ + hash: string; + /** Lexeme clusters in this verse. */ + clusters: ClusterData[]; + /** Punctuation changes in this verse. */ + punctuations: PunctuationData[]; + } + + /** Root interlinear data: book + verses. */ + export interface InterlinearData { + /** Language code or name for the glosses. */ + glossLanguage: string; + /** Book id (e.g. "RUT", "MAT"). */ + bookId: string; + /** + * Verse data keyed by verse reference (e.g. "RUT 3:1"). Exactly one entry per reference; the + * parser rejects XML that contains duplicate verse references. + */ + verses: Record; + } +} diff --git a/src/parsers/paratext-9/paratext9Converter.ts b/src/parsers/paratext-9/paratext9Converter.ts new file mode 100644 index 0000000..ea8d010 --- /dev/null +++ b/src/parsers/paratext-9/paratext9Converter.ts @@ -0,0 +1,286 @@ +/** + * @file Converts Paratext 9 interlinear data structures to the interlinearizer model. + * + * This module converts from {@link InterlinearData} (paratext-9-types) to {@link Interlinearization} + * (interlinearizer types), mapping Paratext 9's verse/cluster/lexeme structure to the + * interlinearizer's book/segment/occurrence/analysis structure. + */ + +import type { InterlinearData, VerseData, StringRange } from 'paratext-9-types'; +import type { + Interlinearization, + AnalyzedBook, + Segment, + Analysis, + Occurrence, + AnalysisAssignment, +} from 'interlinearizer'; +import { + OccurrenceType, + AnalysisType, + AssignmentStatus, + Confidence, +} from 'types/interlinearizer-enums'; + +/** + * Generates a deterministic ID for an interlinearization from Paratext 9 data. + * + * @param bookId - Book ID from InterlinearData. + * @returns A unique ID for the interlinearization. + */ +function generateInterlinearizationId(bookId: string): string { + return `${bookId}-interlinear`.toLowerCase().replace(/\s+/g, '-'); +} + +/** + * Generates a deterministic ID for an analyzed book. + * + * @param bookId - Book ID. + * @returns A unique ID for the book. + */ +function generateBookId(bookId: string): string { + return bookId.toLowerCase().replace(/\s+/g, '-'); +} + +/** + * Generates a deterministic ID for a segment (verse). + * + * @param bookId - Book ID. + * @param verseRef - Verse reference (e.g., "MAT 1:1"). + * @returns A unique ID for the segment. + */ +function generateSegmentId(verseRef: string): string { + return verseRef.toLowerCase().replace(/\s+/g, '-'); +} + +/** + * Generates a deterministic ID for an occurrence from a cluster. + * + * @param segmentId - Parent segment ID. + * @param clusterId - Cluster ID from ClusterData. + * @param index - Zero-based index within the segment. + * @returns A unique ID for the occurrence. + */ +function generateOccurrenceIdFromCluster( + segmentId: string, + clusterId: string, + index: number, +): string { + return `${segmentId}-occ-${index}-${clusterId}`; +} + +/** + * Generates a deterministic ID for an occurrence from punctuation. + * + * @param segmentId - Parent segment ID. + * @param textRange - Text range of the punctuation. + * @param index - Zero-based index within the segment. + * @returns A unique ID for the occurrence. + */ +function generateOccurrenceIdFromPunctuation( + segmentId: string, + textRange: StringRange, + index: number, +): string { + return `${segmentId}-punc-${index}-${textRange.index}-${textRange.length}`; +} + +/** + * Generates a deterministic ID for an analysis from lexeme data. + * + * @param lexemeId - Lexeme ID. + * @param senseId - Sense/gloss ID. + * @param glossLanguage - Gloss language code. + * @returns A unique ID for the analysis. + */ +function generateAnalysisId(lexemeId: string, senseId: string, glossLanguage: string): string { + const sensePart = senseId ? `-${senseId}` : ''; + return `analysis-${glossLanguage}-${lexemeId}${sensePart}`; +} + +/** + * Generates a deterministic ID for an analysis assignment. + * + * @param occurrenceId - Occurrence ID. + * @param analysisId - Analysis ID. + * @returns A unique ID for the assignment. + */ +function generateAssignmentId(occurrenceId: string, analysisId: string): string { + return `assign-${occurrenceId}-${analysisId}`; +} + +/** + * Converts a text range to an anchor string. + * + * @param textRange - Character range in source text. + * @returns Anchor string in format "index-length". + */ +function textRangeToAnchor(textRange: StringRange): string { + return `${textRange.index}-${textRange.length}`; +} + +/** + * Converts a Paratext 9 verse to an interlinearizer segment. + * + * @param verseRef - Verse reference (e.g., "MAT 1:1"). + * @param verseData - Verse data from Paratext 9. + * @param bookId - Book ID for generating segment ID. + * @param glossLanguage - Gloss language code. + * @returns A Segment with occurrences converted from clusters and punctuations. + */ +function convertVerseToSegment( + verseRef: string, + verseData: VerseData, + glossLanguage: string, +): Segment { + const segmentId = generateSegmentId(verseRef); + + const wordOccurrences = verseData.clusters.map((cluster, clusterIndex): Occurrence => { + const assignments = cluster.lexemes.map((lexeme): AnalysisAssignment => { + const analysisId = generateAnalysisId(lexeme.lexemeId, lexeme.senseId, glossLanguage); + const assignmentId = generateAssignmentId( + generateOccurrenceIdFromCluster(segmentId, cluster.id, clusterIndex), + analysisId, + ); + + return { + id: assignmentId, + occurrenceId: generateOccurrenceIdFromCluster(segmentId, cluster.id, clusterIndex), + analysisId, + status: verseData.hash ? AssignmentStatus.Approved : AssignmentStatus.Suggested, + }; + }); + + const occurrenceId = generateOccurrenceIdFromCluster(segmentId, cluster.id, clusterIndex); + + return { + id: occurrenceId, + segmentId, + index: clusterIndex, + anchor: textRangeToAnchor(cluster.textRange), + surfaceText: '', // Paratext 9 doesn't specify surface text per cluster + writingSystem: '', // Paratext 9 doesn't specify writing system per cluster + type: OccurrenceType.Word, + assignments, + }; + }); + + const punctuationOccurrences: Occurrence[] = verseData.punctuations.map( + (punctuation, puncIndex): Occurrence => { + const occurrenceIndex = wordOccurrences.length + puncIndex; + + return { + id: generateOccurrenceIdFromPunctuation(segmentId, punctuation.textRange, occurrenceIndex), + segmentId, + index: occurrenceIndex, + anchor: textRangeToAnchor(punctuation.textRange), + surfaceText: punctuation.afterText || punctuation.beforeText || '', + writingSystem: '', + type: OccurrenceType.Punctuation, + assignments: [], + }; + }, + ); + + const occurrences = [...wordOccurrences, ...punctuationOccurrences]; + + return { + id: segmentId, + segmentRef: verseRef, + baselineText: '', // Paratext 9 doesn't specify baseline text + occurrences, + }; +} + +/** + * Creates Analysis objects for all unique lexemes across all verses. + * + * @param interlinearData - Paratext 9 interlinear data. + * @returns Map of analysis ID to Analysis object. + */ +function createAnalyses(interlinearData: InterlinearData): Map { + const analyses = new Map(); + const { glossLanguage } = interlinearData; + + // Collect all unique lexeme-sense pairs + Object.values(interlinearData.verses).forEach((verseData) => { + verseData.clusters.forEach((cluster) => { + cluster.lexemes.forEach((lexeme) => { + const analysisId = generateAnalysisId(lexeme.lexemeId, lexeme.senseId, glossLanguage); + + if (!analyses.has(analysisId)) { + const analysis: Analysis = { + id: analysisId, + analysisLanguage: glossLanguage, + analysisType: AnalysisType.Gloss, // Paratext 9 provides word-level glosses + confidence: Confidence.Medium, // Default confidence level + sourceSystem: 'paratext-9', + sourceUser: 'paratext-9-parser', + glossText: lexeme.senseId || undefined, // Use senseId as gloss text placeholder + // Note: Paratext 9 doesn't provide POS, features, or morpheme bundles in the XML + }; + + analyses.set(analysisId, analysis); + } + }); + }); + }); + + return analyses; +} + +/** + * Converts Paratext 9 InterlinearData to interlinearizer Interlinearization. + * + * This function performs the following mappings: + * + * - InterlinearData → Interlinearization (one per book) + * - VerseData → Segment (one per verse) + * - ClusterData → Occurrence (word type) with AnalysisAssignments + * - PunctuationData → Occurrence (punctuation type) + * - LexemeData → Analysis + AnalysisAssignment + * + * Note: Analysis objects are created but not directly attached to the Interlinearization. They are + * referenced via AnalysisAssignment.analysisId. In a full implementation, you might want to store + * them in a separate collection or attach them to a parent structure. + * + * @param interlinearData - Paratext 9 interlinear data to convert. + * @param baselineTexts - Optional map of verse references to baseline text (for extracting + * surfaceText). If not provided, surfaceText will be empty strings. + * @returns Converted Interlinearization object. + */ +export function convertParatext9ToInterlinearization( + interlinearData: InterlinearData, +): Interlinearization { + const { glossLanguage, bookId, verses } = interlinearData; + + const interlinearizationId = generateInterlinearizationId(bookId); + const analyzedBookId = generateBookId(bookId); + + // Note: analyses are created but not returned - they're referenced via analysisId in assignments + createAnalyses(interlinearData); + + const segments = Object.entries(verses).map(([verseRef, verseData]) => { + return convertVerseToSegment(verseRef, verseData, glossLanguage); + }); + + const verseDataArray = Object.values(verses); + const verseWithHash = verseDataArray.find((verseData) => verseData.hash); + const textVersion = verseWithHash?.hash || ''; + + const analyzedBook: AnalyzedBook = { + id: analyzedBookId, + bookRef: bookId, + textVersion, + segments, + }; + + const interlinearization: Interlinearization = { + id: interlinearizationId, + sourceWritingSystem: '', // Paratext 9 doesn't specify source writing system in InterlinearData + analysisLanguages: [glossLanguage], + books: [analyzedBook], + }; + + return interlinearization; +} diff --git a/src/parsers/interlinearXmlParser.ts b/src/parsers/paratext-9/paratext9Parser.ts similarity index 86% rename from src/parsers/interlinearXmlParser.ts rename to src/parsers/paratext-9/paratext9Parser.ts index 7102b92..ce9372e 100644 --- a/src/parsers/interlinearXmlParser.ts +++ b/src/parsers/paratext-9/paratext9Parser.ts @@ -6,7 +6,7 @@ import type { StringRange, InterlinearData, VerseData, -} from 'interlinearizer'; +} from 'paratext-9-types'; /** Range: Index and Length attributes. */ interface ParsedRange { @@ -62,10 +62,8 @@ interface ParsedVersesItem { VerseData?: ParsedVerseData; } -/** Root element: ScrTextName, GlossLanguage, BookId, Verses (with item[]). */ +/** Root element: GlossLanguage, BookId, Verses (with item[]). */ interface ParsedInterlinearDataRoot { - /** Source text name (FXP attribute ScrTextName). */ - ['@_ScrTextName']?: string; /** Gloss language (FXP attribute GlossLanguage). */ ['@_GlossLanguage']?: string; /** Book id (FXP attribute BookId). */ @@ -95,7 +93,7 @@ function extractLexemesFromCluster(clusterElement: ParsedCluster): LexemeData[] if (!lexemeId) { throw new Error('Invalid XML: Lexeme missing required Id attribute'); } - return { LexemeId: lexemeId, SenseId: el['@_GlossId'] ?? '' }; + return { lexemeId, senseId: el['@_GlossId'] ?? '' }; }); } @@ -122,9 +120,9 @@ function extractPunctuationsFromVerse(verseDataElement: ParsedVerseData): Punctu if (!Number.isFinite(index) || !Number.isFinite(length)) return []; return [ { - TextRange: { Index: index, Length: length }, - BeforeText: el.BeforeText ?? '', - AfterText: el.AfterText ?? '', + textRange: { index, length }, + beforeText: el.BeforeText ?? '', + afterText: el.AfterText ?? '', }, ]; }); @@ -153,21 +151,21 @@ function extractClustersFromVerse(verseDataElement: ParsedVerseData): ClusterDat throw new Error('Invalid XML: Range missing required Index or Length attributes'); } - const textRange: StringRange = { Index: index, Length: length }; + const textRange: StringRange = { index, length }; const lexemes = extractLexemesFromCluster(el); // Join with "/"; lexeme IDs may contain "/", so do not split LexemesId elsewhere. - const lexemesId = lexemes.map((l) => l.LexemeId).join('/'); + const lexemesId = lexemes.map((l) => l.lexemeId).join('/'); /** Cluster Id: LexemesId/Index-Length when lexemes present; Index-Length when none. */ const id = lexemesId ? `${lexemesId}/${index}-${length}` : `${index}-${length}`; const excluded = el.Excluded === 'true'; return { - TextRange: textRange, - Lexemes: lexemes, - LexemesId: lexemesId, - Id: id, - Excluded: excluded, + textRange, + lexemes, + lexemesId, + id, + excluded, }; }); } @@ -179,7 +177,7 @@ function extractClustersFromVerse(verseDataElement: ParsedVerseData): ClusterDat * Output matches the types in `interlinearizer`; no extra conversion is done. Expects the * interlinear XML schema described in [pt9-xml.md](pt9-xml.md). */ -export class InterlinearXmlParser { +export class Paratext9Parser { private readonly parser: XMLParser; /** @@ -213,8 +211,8 @@ export class InterlinearXmlParser { * @param xml - Raw XML string (e.g. file contents). Must be valid interlinear XML with * InterlinearData root, GlossLanguage and BookId attributes, and Verses containing item * entries. - * @returns Parsed interlinear data: ScrTextName, GlossLanguage, BookId, and Verses (record of - * verse key to {@link VerseData} with Hash, Clusters, Punctuations). + * @returns Parsed interlinear data: GlossLanguage, BookId, and Verses (record of verse key to + * {@link VerseData} with Hash, Clusters, Punctuations). * @throws {Error} If the root element, required attributes (GlossLanguage, BookId), required * structure (Verses, Cluster Range, Lexeme Id), or duplicate verse reference is present. */ @@ -225,7 +223,6 @@ export class InterlinearXmlParser { throw new Error('Invalid XML: Missing InterlinearData root element'); } - const scrTextName = root['@_ScrTextName'] ?? ''; const glossLanguage = root['@_GlossLanguage'] ?? ''; const bookId = root['@_BookId'] ?? ''; if (!glossLanguage || !bookId) { @@ -251,23 +248,22 @@ export class InterlinearXmlParser { const verseDataElement = item.VerseData; if (!verseDataElement) { - acc[verseKey] = { Hash: '', Clusters: [], Punctuations: [] }; + acc[verseKey] = { hash: '', clusters: [], punctuations: [] }; return acc; } acc[verseKey] = { - Hash: verseDataElement['@_Hash'] ?? '', - Clusters: extractClustersFromVerse(verseDataElement), - Punctuations: extractPunctuationsFromVerse(verseDataElement), + hash: verseDataElement['@_Hash'] ?? '', + clusters: extractClustersFromVerse(verseDataElement), + punctuations: extractPunctuationsFromVerse(verseDataElement), }; return acc; }, {}); return { - ScrTextName: scrTextName, - GlossLanguage: glossLanguage, - BookId: bookId, - Verses: verses, + glossLanguage, + bookId, + verses, }; } } diff --git a/src/parsers/pt9-xml.md b/src/parsers/paratext-9/pt9-xml.md similarity index 95% rename from src/parsers/pt9-xml.md rename to src/parsers/paratext-9/pt9-xml.md index 56d0baa..7cb7caf 100644 --- a/src/parsers/pt9-xml.md +++ b/src/parsers/paratext-9/pt9-xml.md @@ -8,7 +8,6 @@ The extension reads PT9 interlinear data from XML files (e.g. `Interlinear_ - + RUT 1:1 diff --git a/src/types/interlinearizer-enums.ts b/src/types/interlinearizer-enums.ts new file mode 100644 index 0000000..5ea8052 --- /dev/null +++ b/src/types/interlinearizer-enums.ts @@ -0,0 +1,58 @@ +/** + * @file Runtime enum values for the interlinearizer model. + * + * Type declarations (and these enums as types) live in interlinearizer.d.ts for the declared module + * 'interlinearizer'. This file provides the actual enum values so code that imports from this + * path (e.g. parsers/converter) has runtime access. Keeps a single source of truth for enum + * values and avoids duplicating them in test mocks. + */ + +/** Whether an occurrence position holds a word or punctuation. */ +export enum OccurrenceType { + /** A word occurrence. */ + Word = 'word', + /** A punctuation occurrence. */ + Punctuation = 'punctuation', +} + +/** The kind of linguistic analysis represented. */ +export enum AnalysisType { + /** Surface wordform only — no gloss or morpheme breakdown. */ + Wordform = 'wordform', + /** Morpheme-level analysis with MorphemeBundles. */ + Morph = 'morph', + /** Word-level gloss (no morpheme decomposition). */ + Gloss = 'gloss', + /** Punctuation placeholder. */ + Punctuation = 'punctuation', +} + +/** + * How the analysis was produced. + * + * - `high` + * - `medium` + * - `low` + * - `guess` + */ +export enum Confidence { + Guess = 'guess', + Low = 'low', + Medium = 'medium', + High = 'high', +} + +/** + * Lifecycle status of an assignment or alignment link. + * + * - `approved` — human-confirmed. + * - `suggested` — machine-generated or unreviewed. + * - `candidate` — proposed but not yet reviewed. + * - `rejected` — explicitly rejected by a human. + */ +export enum AssignmentStatus { + Approved = 'approved', + Suggested = 'suggested', + Candidate = 'candidate', + Rejected = 'rejected', +} diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index 75736ff..2bcd6f7 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -2,74 +2,483 @@ * @file Extension type declaration file. Platform.Bible shares this with other extensions. Types * exposed here (and in papi-shared-types) are available to other extensions. */ + /** - * Interlinear types (InterlinearData, VerseData, ClusterData, etc.) are the public API for - * interlinear data. The XML parser in src/parsers/interlinearXmlParser.ts consumes raw - * fast-xml-parser output internally and returns objects conforming to these types. + * Interlinearizer Interlinear Model + * + * A representation for interlinear data that should cover import from LCM (FieldWorks), Paratext 9, + * and BT Extension and support the new interlinearizer */ declare module 'interlinearizer' { - /** Character range in source text (Index, Length). */ - export interface StringRange { - /** Start index of the range in the source text (0-based). */ - Index: number; - /** Number of characters in the range. */ - Length: number; + // --------------------------------------------------------------------------- + // Enums + // --------------------------------------------------------------------------- + + /** Whether an occurrence position holds a word or punctuation. */ + export enum OccurrenceType { + /** A word occurrence. */ + Word = 'word', + /** A punctuation occurrence. */ + Punctuation = 'punctuation', + } + + /** The kind of linguistic analysis represented. */ + export enum AnalysisType { + /** Surface wordform only — no gloss or morpheme breakdown. */ + Wordform = 'wordform', + /** Morpheme-level analysis with MorphemeBundles. */ + Morph = 'morph', + /** Word-level gloss (no morpheme decomposition). */ + Gloss = 'gloss', + /** Punctuation placeholder. */ + Punctuation = 'punctuation', + } + + /** + * How the analysis was produced. + * + * - `high` + * - `medium` + * - `low` + * - `guess` + */ + export enum Confidence { + Guess = 'guess', + Low = 'low', + Medium = 'medium', + High = 'high', } - /** Data on the interlinearization of a single lexeme. */ - export interface LexemeData { - /** ID of the lexeme (e.g. from Lexicon; XML attribute Id). */ - LexemeId: string; - /** ID of the sense/gloss used for this lexeme (XML attribute GlossId). */ - SenseId: string; + /** + * Lifecycle status of an assignment or alignment link. + * + * - `approved` — human-confirmed. + * - `suggested` — machine-generated or unreviewed. + * - `candidate` — proposed but not yet reviewed. + * - `rejected` — explicitly rejected by a human. + */ + export enum AssignmentStatus { + Approved = 'approved', + Suggested = 'suggested', + Candidate = 'candidate', + Rejected = 'rejected', } - /** Data on the interlinearization of a cluster. */ - export interface ClusterData { - /** Character range this cluster occupies in the verse text. */ - TextRange: StringRange; - /** Lexemes in this cluster, in order. */ - Lexemes: LexemeData[]; - /** Slash-joined LexemeIds for this cluster (e.g. "Word:a/Word:b"). */ - LexemesId: string; - /** Unique cluster id: LexemesId plus TextRange (e.g. "Word:a/Word:b/21-3"). */ - Id: string; - /** Excluded flag. See [pt9-xml.md](../parsers/pt9-xml.md) for details. */ - Excluded: boolean; + // --------------------------------------------------------------------------- + // §1.1 Interlinearization + // --------------------------------------------------------------------------- + + /** + * Top-level container for all interlinear data. + * + * Source-system mapping: + * + * - LCM: one `IScripture` instance (singleton per project). + * - Paratext: merged from per-book, per-language `InterlinearData` files. + * - BT Extension: one `Translation` (project scope). + */ + export interface Interlinearization { + id: string; + + /** Writing system of the source text being analyzed. */ + sourceWritingSystem: string; + + /** + * Writing systems in which analyses are provided (e.g. `["en", "fr"]`). A single interlinear + * can hold analyses in multiple languages. + */ + analysisLanguages: string[]; + + /** Books of scripture (or other texts) that have been analyzed. */ + books: AnalyzedBook[]; } - /** Data on punctuation change. */ - export interface PunctuationData { - /** Character range this punctuation occupies in the verse text. */ - TextRange: StringRange; - /** Punctuation text before the change (or empty). */ - BeforeText: string; - /** Punctuation text after the change (or empty). */ - AfterText: string; + // --------------------------------------------------------------------------- + // §1.2 AnalyzedBook + // --------------------------------------------------------------------------- + + /** + * One book of scripture (or other text unit) analyzed within an Interlinear. + * + * Source-system mapping: + * + * - LCM: `IScrBook`. `bookRef` = `BookId` (3-letter SIL code). + * - Paratext: book-level `InterlinearData` (merged across languages). + * - BT Extension: one book within a `Translation`. + */ + export interface AnalyzedBook { + id: string; + + /** Book identifier (e.g. `"GEN"`, `"MAT"`). */ + bookRef: string; + + /** + * Hash or version stamp of the source text at analysis time. Used to detect when the underlying + * text has changed and analyses may be stale. + */ + textVersion: string; + + /** Ordered segments that compose this book. */ + segments: Segment[]; + } + + // --------------------------------------------------------------------------- + // §1.3 Segment + // --------------------------------------------------------------------------- + + /** + * A sentence, clause, or verse — the unit within which occurrences are ordered. + * + * Source-system mapping: + * + * - LCM: `ISegment` owned by `IScrTxtPara` within `IScrSection`. + * - Paratext: a verse (`VerseRef`) within `VerseData`. + * - BT Extension: a `Verse` (BCV identifier). + */ + export interface Segment { + id: string; + + /** Canonical reference (e.g. verse reference, paragraph index + offset range). */ + segmentRef: string; + + /** Raw text of the segment, for display and validation. */ + baselineText?: string; + + /** Idiomatic translation of the segment. */ + freeTranslation?: MultiString; + + /** Word-for-word translation. */ + literalTranslation?: MultiString; + + /** Ordered word / punctuation tokens in this segment. */ + occurrences: Occurrence[]; } - /** Interlinear data for a single verse. */ - export interface VerseData { - /** Hash of verse text when approved; empty string if not approved. */ - Hash: string; - /** Lexeme clusters in this verse. */ - Clusters: ClusterData[]; - /** Punctuation changes in this verse. */ - Punctuations: PunctuationData[]; + /** A string value keyed by writing-system tag. */ + export type MultiString = Record; + + // --------------------------------------------------------------------------- + // §1.4 Occurrence + // --------------------------------------------------------------------------- + + /** + * A single word or punctuation token at a specific position in the text. Inherits its text + * version from the parent AnalyzedBook. + * + * Source-system mapping: + * + * - LCM: entry in `ISegment.AnalysesRS` at a given index. + * - Paratext: `ClusterData` within `VerseData`. + * - BT Extension: `Token` (API) / `Instance` (DB). + */ + export interface Occurrence { + id: string; + + /** Parent segment. */ + segmentId: string; + + /** Zero-based position within the segment (preserves word order). */ + index: number; + + /** + * Positional anchor in the source text. Supports BCVWP, BCVWP+partNum, StringRange, or + * character offset depending on source system. + */ + anchor: string; + + /** The text as it appears in the source. */ + surfaceText: string; + + /** Writing system of `surfaceText`. */ + writingSystem: string; + + type: OccurrenceType; + + /** All analysis assignments for this occurrence (zero or more). */ + assignments: AnalysisAssignment[]; } - /** Root interlinear data: book + verses. */ - export interface InterlinearData { - /** Source text / project name (e.g. from InterlinearData ScrTextName attribute). */ - ScrTextName: string; - /** Language code or name for the glosses. */ - GlossLanguage: string; - /** Book id (e.g. "RUT", "MAT"). */ - BookId: string; + // --------------------------------------------------------------------------- + // §1.5 Analysis + // --------------------------------------------------------------------------- + + /** + * A reusable analysis describing a linguistic interpretation of a word. The same analysis can be + * assigned to many occurrences. + * + * Confidence and provenance belong to the analysis itself because they describe how the + * interpretation was produced. + * + * Source-system mapping: + * + * - LCM: `IWfiAnalysis` (morph), `IWfiGloss` (gloss), or bare `IWfiWordform` (wordform). + * - Paratext: `LexemeCluster` + `WordAnalysis`. + * - BT Extension: synthesized from `Token.gloss` / `lemmaText` / `senseIds`. Requires deduplication + * — BT Extension stores gloss/sense per-token, not as shared analysis objects. + */ + export interface Analysis { + id: string; + + /** Writing system of the analysis (e.g. the gloss language). */ + analysisLanguage: string; + + analysisType: AnalysisType; + + confidence: Confidence; + + /** System that produced the analysis (e.g. "lcm", "paratext"). */ + sourceSystem: string; + + /** + * User or automation identifier within the source system (e.g. "jsmith", "parser-v3", + * "auto-glosser"). Use a stable automation ID when no human directly applied the analysis. + */ + sourceUser: string; + + /** Word-level gloss text. */ + glossText?: string; + + /** Part of speech. */ + pos?: string; + + /** Morphosyntactic feature structure. */ + features?: Record; + + /** Ordered morpheme breakdown, when analysis is at the morpheme level (`analysisType = morph`). */ + morphemeBundles?: MorphemeBundle[]; + } + + // --------------------------------------------------------------------------- + // §1.6 AnalysisAssignment + // --------------------------------------------------------------------------- + + /** + * The join between an occurrence and an analysis. Multiple assignments per occurrence enable + * competing analyses. + * + * Source-system mapping: + * + * - LCM: `ISegment.AnalysesRS[i]` referencing `IWfiGloss` or `IWfiAnalysis`. + * - Paratext: `ClusterData` with selected `LexemeData`. + * - BT Extension: `Token` linked to senses (`senseIds`). Status inferred from + * `Instance.termStatusNum` (BiblicalTermStatus enum). + */ + export interface AnalysisAssignment { + id: string; + + /** The occurrence being analyzed. */ + occurrenceId: string; + + /** The analysis applied. */ + analysisId: string; + + /** Whether a human has confirmed this analysis for this occurrence. */ + status: AssignmentStatus; + + /** Timestamp of when the assignment was made. */ + createdAt?: string; + } + + // --------------------------------------------------------------------------- + // §1.7 MorphemeBundle + // --------------------------------------------------------------------------- + + /** + * An ordered morpheme within a morpheme-level analysis, linking to the lexicon. + * + * The four optional lexicon references mirror LCM's `IWfiMorphBundle` three-way link plus the + * owning entry: + * + * `allomorphRef` → `IMoForm` (which surface form / allomorph) `lexemeRef` → `ILexEntry` (owning + * dictionary entry) `senseRef` → `ILexSense` (which meaning) `grammarRef` → `IMoMorphSynAnalysis` + * (grammatical behaviour) + * + * In LCM an `ILexEntry` owns one _LexemeForm_ (the elsewhere / citation allomorph) and + * zero-or-more _AlternateForms_ — both are `IMoForm`. `allomorphRef` identifies the specific + * `IMoForm` matched in this context; `lexemeRef` identifies the entry that owns it. + * + * `form` vs `allomorphRef` — `form` is the surface text of the morpheme as it appeared in this + * specific analysis context. `allomorphRef` is a reference (ID) to the canonical allomorph object + * in the lexicon. These can legitimately differ: in LCM `IWfiMorphBundle.Form` may reflect + * phonological conditioning that differs from the canonical `IMoForm.Form`. When `allomorphRef` + * is absent, `form` is the only record of the morpheme shape. + * + * Source-system mapping: + * + * - LCM: `IWfiMorphBundle` (1:1). `allomorphRef` = GUID of `IWfiMorphBundle.MorphRA` (`IMoForm`). + * `lexemeRef` = GUID of the `ILexEntry` that owns that `IMoForm` (via `LexemeFormOA` or + * `AlternateFormsOS`). + * - Paratext: each `Lexeme` within a `WordAnalysis`. Paratext's built-in XML lexicon has no + * allomorph concept distinct from the entry — `Lexeme.AlternateForms` exists in the interface + * but returns empty. `allomorphRef` is therefore omitted for the built-in lexicon. When an + * integrated provider (e.g. FLEx via `IntegratedLexicalProvider`) is active, `AllomorphEntry` + * surfaces actual allomorphs and `allomorphRef` can be populated. `lexemeRef` = `Lexeme.Id` + * (LexemeKey-derived). + * - BT Extension: not natively modeled as morpheme bundles. A whole-word bundle can be synthesized: + * `form` = `Token.text`, `allomorphRef` = `headwordId` (the BT Extension "morph" concept + * corresponds to the FieldWorks Allomorph; the HeadWord's lemma is the elsewhere / LexemeForm + * allomorph), `lexemeRef` = `headwordId`, `senseRef` = `senseIds[0]`. Macula TSV `morph` field + * can supply the specific allomorphic form when it differs from the lemma. + */ + export interface MorphemeBundle { + id: string; + + /** Zero-based position within the analysis (preserves morpheme order). */ + index: number; + + /** The morpheme form as it appears in this analysis (surface text). */ + form: string; + + /** Writing system of `form`. */ + writingSystem: string; + + /** + * Reference to a specific Allomorph (`IMoForm`) in the lexical model. + * + * An `ILexEntry` in LCM owns one _LexemeForm_ (the elsewhere / citation allomorph) and + * zero-or-more _AlternateForms_. This field identifies which allomorph was matched in this + * morpheme position. + * + * In the BT Extension the "morph" concept aligns with this field: the HeadWord's lemma acts as + * the LexemeForm (elsewhere allomorph). + */ + allomorphRef?: string; + + /** Reference to Lexeme (`ILexEntry`) in the lexical model. */ + lexemeRef?: string; + + /** Reference to Sense (`ILexSense`) in the lexical model. */ + senseRef?: string; + + /** Reference to Grammar / MSA (`IMoMorphSynAnalysis`) in the lexical model. */ + grammarRef?: string; + } + + // --------------------------------------------------------------------------- + // §1.8 InterlinearAlignment + // --------------------------------------------------------------------------- + + /** + * A project pairing a source-language interlinearization and a target-language interlinear with + * morph-level alignment links between them. + * + * Both interlinearizations carry their own analyzed books, segments, occurrences, and analyses. + * AlignmentLinks bridge the two, connecting individual morphemes (MorphemeBundles) or whole + * unanalyzed words (Occurrences) across the language boundary. + * + * Source-system mapping: + * + * - LCM: LCM has no native alignment or bilingual pairing model. An InterlinearAlignment is + * constructed by pairing a Scripture- based interlinearization (vernacular) with a source-text + * interlinearization produced externally (e.g. Greek/Hebrew resource text). + * - Paratext: not directly represented. Can be constructed from parallel projects that share the + * same versification. + * - BT Extension: one `Translation` scoped to source + target sides (`Translation.sideNum`: 1 = + * source, 2 = target). Each side becomes an `Interlinearization`. `Alignment` records become + * `AlignmentLink`s. + */ + export interface InterlinearAlignment { + id: string; + + /** The source-language interlinearization (e.g. Greek / Hebrew). */ + source: Interlinearization; + + /** The target-language interlinearization (e.g. vernacular translation). */ + target: Interlinearization; + + /** + * Morph-level alignment links connecting endpoints in the source interlinear to endpoints in + * the target interlinear. + */ + links: AlignmentLink[]; + } + + // --------------------------------------------------------------------------- + // §1.9 AlignmentLink + // --------------------------------------------------------------------------- + + /** + * A directional alignment link from one or more source-text morphemes / words to one or more + * target-text morphemes / words. + * + * Each endpoint resolves to either: + * + * - A specific MorphemeBundle within a fully analyzed occurrence, connecting at the allomorph level + * (via `allomorphRef`). + * - A whole unanalyzed occurrence, when no morpheme-level analysis exists. + * + * Typical workflow: the user selects a morph from the source-text interlinear and connects it to + * an allomorph of a fully analyzed occurrence in the target-text interlinear — or to an + * unanalyzed occurrence if the target word has not yet been broken into morphemes. + * + * Source-system mapping: + * + * - LCM: no native alignment model; links are produced by external tools. + * - Paratext: not stored in interlinear data; derivable from parallel interlinear selections when + * two projects share versification. + * - BT Extension: `Alignment` entity. Each `Alignment` record with `sourceInstances` / + * `targetInstances` is decomposed into `AlignmentEndpoint`s — one per instance. BT Extension's + * "morph" concept (the token's morphological form) maps to a MorphemeBundle-level endpoint when + * a morpheme analysis is present; otherwise the endpoint targets the whole occurrence. `status` + * from `statusNum` via BT Extension's `AlignmentStatus` enum (CREATED=0, REJECTED=1, + * APPROVED=2, NEEDS_REVIEW=3) — lossy mapping where both CREATED and NEEDS_REVIEW collapse to + * `candidate`. `origin` from `originNum` — an undocumented integer with no enum; descriptive + * strings must be defined externally. Eflomal-generated alignments leave `originNum` and + * `statusNum` unset, so both default to 0 (`CREATED`). + */ + export interface AlignmentLink { + id: string; + + /** Source-side endpoints (one or more morphemes / words from the source interlinear). */ + sourceEndpoints: AlignmentEndpoint[]; + + /** Target-side endpoints (one or more morphemes / words from the target interlinear). */ + targetEndpoints: AlignmentEndpoint[]; + + status: AssignmentStatus; + + /** How the alignment was created (manual, automatic tool, etc.). */ + origin?: string; + + /** + * Confidence in this alignment link, independent of the confidence on the analyses at each + * endpoint. + */ + confidence?: Confidence; + + /** Multilingual notes keyed by writing system (e.g. UI locale). */ + notes?: MultiString; + } + + // --------------------------------------------------------------------------- + // §1.10 AlignmentEndpoint + // --------------------------------------------------------------------------- + + /** + * One side of an alignment link, identifying a precise point of connection within an interlinear + * text. + * + * When the referenced occurrence has a morpheme-level analysis, `bundleId` identifies the + * specific MorphemeBundle — and by extension its `allomorphRef` (IMoForm), `lexemeRef` + * (ILexEntry), `senseRef` (ILexSense), and `grammarRef` (IMoMorphSynAnalysis). + * + * When the occurrence is unanalyzed, `bundleId` is absent and the link targets the whole word. + * + * Resolution chain (fully analyzed): AlignmentEndpoint → Occurrence → AnalysisAssignment → + * Analysis → MorphemeBundle → allomorphRef (IMoForm) → lexemeRef (ILexEntry) → senseRef + * (ILexSense) → grammarRef (IMoMorphSynAnalysis) + * + * Resolution chain (unanalyzed): AlignmentEndpoint → Occurrence → surfaceText only + */ + export interface AlignmentEndpoint { + /** The word or punctuation occurrence in the text. */ + occurrenceId: string; + /** - * Verse data keyed by verse reference (e.g. "RUT 3:1"). Exactly one entry per reference; the - * parser rejects XML that contains duplicate verse references. + * Identifies a specific MorphemeBundle within one of the occurrence's analyses. When set, the + * alignment connects at the allomorph / morpheme level. When absent, the alignment connects to + * the whole (unanalyzed) occurrence. */ - Verses: Record; + bundleId?: string; } } diff --git a/tsconfig.json b/tsconfig.json index 6a9ab66..53a54d9 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -57,10 +57,11 @@ "sourceMap": true, // We need a baseurl for webpack's tsconfig path aliases plugin "baseUrl": "./", - /** Paths for src-rooted imports (e.g. in src/__tests__ use "parsers/..." or "main" instead of "../../parsers/..."). */ + /** Paths for src-rooted imports (e.g. in src/__tests__ use "parsers/..." or "types/..." instead of relative paths). */ "paths": { "@main": ["src/main"], - "parsers/*": ["src/parsers/*"] + "parsers/*": ["src/parsers/*"], + "types/*": ["src/types/*"] }, "noUnusedLocals": true, "noUnusedParameters": true, From 00e321b28329d96d351e2ef2ce0b6853f2808c85 Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Thu, 19 Feb 2026 15:11:01 -0700 Subject: [PATCH 2/8] Enhance interlinearizer WebView with analyses view and update tests - Add support for a new JSON view mode displaying analyses derived from parsed data. - Implement functions to describe and label the new view mode. - Update the WebView component to include a button for switching to the analyses view. - Modify tests to cover the new analyses functionality and ensure proper rendering. - Refactor the `createAnalyses` function to generate analysis maps from interlinear data. --- .../interlinearizer.web-view.test.tsx | 35 ++- .../paratext-9/paratext9Converter.test.ts | 218 +++++++++++++++++- src/interlinearizer.web-view.tsx | 68 ++++-- src/parsers/paratext-9/paratext-9-types.ts | 2 +- src/parsers/paratext-9/paratext9Converter.ts | 8 +- 5 files changed, 302 insertions(+), 29 deletions(-) diff --git a/src/__tests__/interlinearizer.web-view.test.tsx b/src/__tests__/interlinearizer.web-view.test.tsx index 67cc843..7b25c59 100644 --- a/src/__tests__/interlinearizer.web-view.test.tsx +++ b/src/__tests__/interlinearizer.web-view.test.tsx @@ -25,6 +25,23 @@ const stubInterlinearization = { const mockParse = jest.fn().mockReturnValue(stubInterlinearData); const mockConvert = jest.fn().mockReturnValue(stubInterlinearization); +/** Stub analyses map for Analyses view (ID → Analysis). */ +const stubAnalysesMap = new Map([ + [ + 'analysis-en-lex1-s1', + { + id: 'analysis-en-lex1-s1', + analysisLanguage: 'en', + analysisType: 'gloss', + confidence: 'medium', + sourceSystem: 'paratext-9', + sourceUser: 'paratext-9-parser', + glossText: 'sense1', + }, + ], +]); +const mockCreateAnalyses = jest.fn().mockReturnValue(stubAnalysesMap); + /** Mock parser: no real XML parsing; returns stub data. Parser/converter are tested elsewhere. */ jest.mock('parsers/paratext-9/paratext9Parser', () => ({ Paratext9Parser: jest.fn().mockImplementation(() => ({ @@ -32,9 +49,10 @@ jest.mock('parsers/paratext-9/paratext9Parser', () => ({ })), })); -/** Mock converter: no real conversion; returns stub Interlinearization. */ +/** Mock converter: no real conversion; returns stub Interlinearization and stub analyses map. */ jest.mock('parsers/paratext-9/paratext9Converter', () => ({ convertParatext9ToInterlinearization: mockConvert, + createAnalyses: mockCreateAnalyses, })); /** @@ -86,13 +104,14 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/test-data\/Interlinear_en_MAT\.xml/i)).toBeInTheDocument(); }); - it('renders the JSON view mode switch (InterlinearData / Interlinearization)', () => { + it('renders the JSON view mode switch (InterlinearData / Interlinearization / Analyses)', () => { render(); const group = screen.getByRole('group', { name: /json view mode/i }); expect(group).toBeInTheDocument(); expect(screen.getByRole('button', { name: /^interlineardata$/i })).toBeInTheDocument(); expect(screen.getByRole('button', { name: /^interlinearization$/i })).toBeInTheDocument(); + expect(screen.getByRole('button', { name: /^analyses$/i })).toBeInTheDocument(); expect(screen.getByText(/view json as:/i)).toBeInTheDocument(); }); @@ -152,6 +171,18 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/bookId/i)).toBeInTheDocument(); }); + it('switching to Analyses shows analysis map JSON from test data', () => { + render(); + + fireEvent.click(screen.getByRole('button', { name: /^analyses$/i })); + + expect(screen.getByText(/^Analyses \(JSON\):$/)).toBeInTheDocument(); + expect(mockCreateAnalyses).toHaveBeenCalledWith(stubInterlinearData); + expect(screen.getByText(/analysis-en-lex1-s1/)).toBeInTheDocument(); + expect(screen.getByText(/glossText/i)).toBeInTheDocument(); + expect(screen.getByText(/paratext-9/i)).toBeInTheDocument(); + }); + it('renders empty JSON pre when jsonToShow is undefined (converter returns undefined)', () => { mockConvert.mockReturnValueOnce(undefined); diff --git a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts index 7a86cdf..f8a2600 100644 --- a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts +++ b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts @@ -1,8 +1,11 @@ -/** @file Unit tests for {@link convertParatext9ToInterlinearization}. */ +/** @file Unit tests for {@link convertParatext9ToInterlinearization} and {@link createAnalyses}. */ /// import type { InterlinearData } from 'paratext-9-types'; -import { convertParatext9ToInterlinearization } from 'parsers/paratext-9/paratext9Converter'; +import { + convertParatext9ToInterlinearization, + createAnalyses, +} from 'parsers/paratext-9/paratext9Converter'; describe('convertParatext9ToInterlinearization', () => { describe('top-level structure', () => { @@ -428,4 +431,215 @@ describe('convertParatext9ToInterlinearization', () => { expect(result.books[0].segments[0].occurrences[0].id).toBe(`${segId}-occ-0-Word:word/0-4`); }); }); + + describe('createAnalyses', () => { + it('returns empty Map when verses is empty', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: {}, + }; + const result = createAnalyses(data); + + expect(result).toBeInstanceOf(Map); + expect(result.size).toBe(0); + }); + + it('returns one Analysis for one verse with one cluster and one lexeme', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 4 }, + lexemes: [{ lexemeId: 'Word:hello', senseId: 'g1' }], + lexemesId: 'Word:hello', + id: 'Word:hello/0-4', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = createAnalyses(data); + + expect(result.size).toBe(1); + const analysis = result.get('analysis-en-Word:hello-g1'); + expect(analysis).toBeDefined(); + expect(analysis?.id).toBe('analysis-en-Word:hello-g1'); + expect(analysis?.analysisLanguage).toBe('en'); + expect(analysis?.analysisType).toBe('gloss'); + expect(analysis?.confidence).toBe('medium'); + expect(analysis?.sourceSystem).toBe('paratext-9'); + expect(analysis?.sourceUser).toBe('paratext-9-parser'); + expect(analysis?.glossText).toBe('g1'); + }); + + it('deduplicates: same lexeme in multiple clusters yields one analysis', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 3 }, + lexemes: [{ lexemeId: 'Word:the', senseId: 'def' }], + lexemesId: 'Word:the', + id: 'c1', + excluded: false, + }, + { + textRange: { index: 4, length: 3 }, + lexemes: [{ lexemeId: 'Word:the', senseId: 'def' }], + lexemesId: 'Word:the', + id: 'c2', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = createAnalyses(data); + + expect(result.size).toBe(1); + expect(result.has('analysis-en-Word:the-def')).toBe(true); + }); + + it('returns multiple analyses for different lexemes (lexemeId or senseId)', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 4 }, + lexemes: [ + { lexemeId: 'Stem:run', senseId: 'g1' }, + { lexemeId: 'Suffix:ing', senseId: 'g2' }, + ], + lexemesId: 'Stem:run', + id: 'cluster1', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = createAnalyses(data); + + expect(result.size).toBe(2); + expect(result.has('analysis-en-Stem:run-g1')).toBe(true); + expect(result.has('analysis-en-Suffix:ing-g2')).toBe(true); + expect(result.get('analysis-en-Stem:run-g1')?.glossText).toBe('g1'); + expect(result.get('analysis-en-Suffix:ing-g2')?.glossText).toBe('g2'); + }); + + it('sets glossText to undefined when senseId is empty', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 1 }, + lexemes: [{ lexemeId: 'Word:a', senseId: '' }], + lexemesId: 'Word:a', + id: 'Word:a/0-1', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = createAnalyses(data); + + expect(result.size).toBe(1); + const analysis = result.get('analysis-en-Word:a'); + expect(analysis).toBeDefined(); + expect(analysis?.glossText).toBeUndefined(); + expect(analysis?.id).toBe('analysis-en-Word:a'); + }); + + it('uses glossLanguage from interlinearData for analysisLanguage and id prefix', () => { + const data: InterlinearData = { + glossLanguage: 'fr', + bookId: 'GEN', + verses: { + 'GEN 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 2 }, + lexemes: [{ lexemeId: 'Word:au', senseId: 'sens1' }], + lexemesId: 'Word:au', + id: 'c1', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = createAnalyses(data); + + expect(result.size).toBe(1); + const analysis = result.get('analysis-fr-Word:au-sens1'); + expect(analysis).toBeDefined(); + expect(analysis?.analysisLanguage).toBe('fr'); + expect(analysis?.id).toBe('analysis-fr-Word:au-sens1'); + }); + + it('includes analyses from all verses', () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 3 }, + lexemes: [{ lexemeId: 'Word:one', senseId: 's1' }], + lexemesId: 'Word:one', + id: 'c1', + excluded: false, + }, + ], + punctuations: [], + }, + 'MAT 1:2': { + hash: '', + clusters: [ + { + textRange: { index: 0, length: 3 }, + lexemes: [{ lexemeId: 'Word:two', senseId: 's2' }], + lexemesId: 'Word:two', + id: 'c2', + excluded: false, + }, + ], + punctuations: [], + }, + }, + }; + const result = createAnalyses(data); + + expect(result.size).toBe(2); + expect(result.has('analysis-en-Word:one-s1')).toBe(true); + expect(result.has('analysis-en-Word:two-s2')).toBe(true); + }); + }); }); diff --git a/src/interlinearizer.web-view.tsx b/src/interlinearizer.web-view.tsx index 4d1a14d..b3808ba 100644 --- a/src/interlinearizer.web-view.tsx +++ b/src/interlinearizer.web-view.tsx @@ -1,7 +1,10 @@ import { useMemo, useState } from 'react'; import type { InterlinearData } from 'paratext-9-types'; -import { Paratext9Parser } from './parsers/paratext-9/paratext9Parser'; -import { convertParatext9ToInterlinearization } from './parsers/paratext-9/paratext9Converter'; +import { Paratext9Parser } from 'parsers/paratext-9/paratext9Parser'; +import { + convertParatext9ToInterlinearization, + createAnalyses, +} from 'parsers/paratext-9/paratext9Converter'; /** Test interlinear XML bundled at build time (from test-data/Interlinear_en_MAT.xml). */ import testXml from '../test-data/Interlinear_en_MAT.xml?raw'; @@ -9,16 +12,30 @@ import testXml from '../test-data/Interlinear_en_MAT.xml?raw'; /** Result of parsing the bundled test XML: either data or an error message. */ type ParseResult = { data: InterlinearData; error: undefined } | { data: undefined; error: string }; -/** View mode for the JSON display: raw PT9 structure or converted interlinearizer model. */ -type JsonViewMode = 'interlinear-data' | 'interlinearization'; +/** View mode for the JSON display: raw PT9, converted model, or analyses map. */ +type JsonViewMode = 'interlinear-data' | 'interlinearization' | 'analyses'; + +function getViewModeDescription(mode: JsonViewMode): string { + if (mode === 'interlinear-data') return 'Paratext 9 book/verse/cluster structure.'; + if (mode === 'interlinearization') + return 'Converted interlinearizer book/segment/occurrence model.'; + return 'Analysis objects (ID → gloss, confidence, source) from test data.'; +} + +function getViewModeLabel(mode: JsonViewMode): string { + if (mode === 'interlinear-data') return 'InterlinearData (JSON):'; + if (mode === 'interlinearization') return 'Interlinearization (JSON):'; + return 'Analyses (JSON):'; +} /** * Main interlinearizer WebView. Parses the bundled test XML into the interlinear model and displays * the result as raw JSON. No PAPI commands or file loading—everything is self-contained. * - * A switch lets the user choose between viewing {@link InterlinearData} (Paratext 9 format) or - * {@link Interlinearization} (converted interlinearizer model). Parser is created inside useMemo so - * parsing runs once per mount. + * A switch lets the user choose between: {@link InterlinearData} (Paratext 9 format), + * {@link Interlinearization} (converted interlinearizer model), or Analyses (ID → Analysis map + * derived from test data: gloss, confidence, source). Parser is created inside useMemo so parsing + * runs once per mount. */ globalThis.webViewComponent = function InterlinearizerWebView() { const [jsonViewMode, setJsonViewMode] = useState('interlinear-data'); @@ -38,8 +55,19 @@ globalThis.webViewComponent = function InterlinearizerWebView() { [parsed], ); - /** In Interlinearization mode use converted data (may be undefined); otherwise use parsed. */ - const jsonToShow = jsonViewMode === 'interlinearization' ? interlinearization : parsed; + /** Analyses map derived from parsed data (ID → Analysis); only defined when parsed exists. */ + const analysesMap = useMemo(() => (parsed ? createAnalyses(parsed) : undefined), [parsed]); + + /** Data to show as JSON: depends on selected view mode. */ + const jsonToShow = ((): + | typeof parsed + | ReturnType + | Record + | undefined => { + if (jsonViewMode === 'interlinearization') return interlinearization; + if (jsonViewMode === 'analyses' && analysesMap) return Object.fromEntries(analysesMap); + return parsed; + })(); return (
@@ -90,18 +118,24 @@ globalThis.webViewComponent = function InterlinearizerWebView() { > Interlinearization +

- {jsonViewMode === 'interlinear-data' - ? 'Paratext 9 book/verse/cluster structure.' - : 'Converted interlinearizer book/segment/occurrence model.'} + {getViewModeDescription(jsonViewMode)}

-

- {jsonViewMode === 'interlinear-data' - ? 'InterlinearData (JSON):' - : 'Interlinearization (JSON):'} -

+

{getViewModeLabel(jsonViewMode)}

             {jsonToShow ? JSON.stringify(jsonToShow, undefined, 2) : ''}
           
diff --git a/src/parsers/paratext-9/paratext-9-types.ts b/src/parsers/paratext-9/paratext-9-types.ts index 263bbd7..9aa1c3c 100644 --- a/src/parsers/paratext-9/paratext-9-types.ts +++ b/src/parsers/paratext-9/paratext-9-types.ts @@ -25,7 +25,7 @@ declare module 'paratext-9-types' { lexemesId: string; /** Unique cluster id: LexemesId plus TextRange (e.g. "Word:a/Word:b/21-3"). */ id: string; - /** Excluded flag. See [pt9-xml.md](../parsers/pt9-xml.md) for details. */ + /** Excluded flag. See [pt9-xml.md](pt9-xml.md) for details. */ excluded: boolean; } diff --git a/src/parsers/paratext-9/paratext9Converter.ts b/src/parsers/paratext-9/paratext9Converter.ts index ea8d010..19fd2dc 100644 --- a/src/parsers/paratext-9/paratext9Converter.ts +++ b/src/parsers/paratext-9/paratext9Converter.ts @@ -45,7 +45,6 @@ function generateBookId(bookId: string): string { /** * Generates a deterministic ID for a segment (verse). * - * @param bookId - Book ID. * @param verseRef - Verse reference (e.g., "MAT 1:1"). * @returns A unique ID for the segment. */ @@ -124,7 +123,6 @@ function textRangeToAnchor(textRange: StringRange): string { * * @param verseRef - Verse reference (e.g., "MAT 1:1"). * @param verseData - Verse data from Paratext 9. - * @param bookId - Book ID for generating segment ID. * @param glossLanguage - Gloss language code. * @returns A Segment with occurrences converted from clusters and punctuations. */ @@ -198,11 +196,10 @@ function convertVerseToSegment( * @param interlinearData - Paratext 9 interlinear data. * @returns Map of analysis ID to Analysis object. */ -function createAnalyses(interlinearData: InterlinearData): Map { +export function createAnalyses(interlinearData: InterlinearData): Map { const analyses = new Map(); const { glossLanguage } = interlinearData; - // Collect all unique lexeme-sense pairs Object.values(interlinearData.verses).forEach((verseData) => { verseData.clusters.forEach((cluster) => { cluster.lexemes.forEach((lexeme) => { @@ -257,9 +254,6 @@ export function convertParatext9ToInterlinearization( const interlinearizationId = generateInterlinearizationId(bookId); const analyzedBookId = generateBookId(bookId); - // Note: analyses are created but not returned - they're referenced via analysisId in assignments - createAnalyses(interlinearData); - const segments = Object.entries(verses).map(([verseRef, verseData]) => { return convertVerseToSegment(verseRef, verseData, glossLanguage); }); From 005eb2ee5ed828e1ea1b5409d617df542bf04652 Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Thu, 19 Feb 2026 15:23:54 -0700 Subject: [PATCH 3/8] Refactor interlinearizer WebView for accessibility and update tests - Change role from 'group' to 'radiogroup' for JSON view mode buttons to improve accessibility. - Update button roles to 'radio' and aria attributes to 'aria-checked' for better semantic meaning. - Modify tests to reflect the updated roles and ensure proper functionality of the JSON view mode switch. --- .../interlinearizer.web-view.test.tsx | 20 +++++++++---------- .../paratext-9/paratext9Converter.test.ts | 1 + src/interlinearizer.web-view.tsx | 11 ++++++---- src/parsers/paratext-9/paratext9Converter.ts | 16 ++++++--------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/__tests__/interlinearizer.web-view.test.tsx b/src/__tests__/interlinearizer.web-view.test.tsx index 7b25c59..eecb01c 100644 --- a/src/__tests__/interlinearizer.web-view.test.tsx +++ b/src/__tests__/interlinearizer.web-view.test.tsx @@ -107,11 +107,11 @@ describe('InterlinearizerWebView', () => { it('renders the JSON view mode switch (InterlinearData / Interlinearization / Analyses)', () => { render(); - const group = screen.getByRole('group', { name: /json view mode/i }); - expect(group).toBeInTheDocument(); - expect(screen.getByRole('button', { name: /^interlineardata$/i })).toBeInTheDocument(); - expect(screen.getByRole('button', { name: /^interlinearization$/i })).toBeInTheDocument(); - expect(screen.getByRole('button', { name: /^analyses$/i })).toBeInTheDocument(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + expect(radiogroup).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^interlineardata$/i })).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^interlinearization$/i })).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^analyses$/i })).toBeInTheDocument(); expect(screen.getByText(/view json as:/i)).toBeInTheDocument(); }); @@ -150,7 +150,7 @@ describe('InterlinearizerWebView', () => { it('switching to Interlinearization shows converted model JSON', () => { render(); - fireEvent.click(screen.getByRole('button', { name: /^interlinearization$/i })); + fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); expect(screen.getByText(/analysisLanguages/i)).toBeInTheDocument(); @@ -161,10 +161,10 @@ describe('InterlinearizerWebView', () => { it('switching back to InterlinearData shows PT9 structure JSON', () => { render(); - fireEvent.click(screen.getByRole('button', { name: /^interlinearization$/i })); + fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); - fireEvent.click(screen.getByRole('button', { name: /^interlineardata$/i })); + fireEvent.click(screen.getByRole('radio', { name: /^interlineardata$/i })); expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); expect(screen.getByText(/glossLanguage/i)).toBeInTheDocument(); @@ -174,7 +174,7 @@ describe('InterlinearizerWebView', () => { it('switching to Analyses shows analysis map JSON from test data', () => { render(); - fireEvent.click(screen.getByRole('button', { name: /^analyses$/i })); + fireEvent.click(screen.getByRole('radio', { name: /^analyses$/i })); expect(screen.getByText(/^Analyses \(JSON\):$/)).toBeInTheDocument(); expect(mockCreateAnalyses).toHaveBeenCalledWith(stubInterlinearData); @@ -187,7 +187,7 @@ describe('InterlinearizerWebView', () => { mockConvert.mockReturnValueOnce(undefined); const { container } = render(); - fireEvent.click(screen.getByRole('button', { name: /^interlinearization$/i })); + fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); const jsonPre = container.querySelector('pre'); expect(jsonPre).toBeInTheDocument(); diff --git a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts index f8a2600..842ff50 100644 --- a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts +++ b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts @@ -212,6 +212,7 @@ describe('convertParatext9ToInterlinearization', () => { }; const result = convertParatext9ToInterlinearization(data); + expect(result.books[0].textVersion).toBe('H1'); expect(result.books[0].segments[0].occurrences[0].assignments[0].status).toBe('approved'); }); }); diff --git a/src/interlinearizer.web-view.tsx b/src/interlinearizer.web-view.tsx index b3808ba..a14a998 100644 --- a/src/interlinearizer.web-view.tsx +++ b/src/interlinearizer.web-view.tsx @@ -91,42 +91,45 @@ globalThis.webViewComponent = function InterlinearizerWebView() { View JSON as:
diff --git a/src/parsers/paratext-9/paratext9Converter.ts b/src/parsers/paratext-9/paratext9Converter.ts index 19fd2dc..27ae517 100644 --- a/src/parsers/paratext-9/paratext9Converter.ts +++ b/src/parsers/paratext-9/paratext9Converter.ts @@ -134,23 +134,19 @@ function convertVerseToSegment( const segmentId = generateSegmentId(verseRef); const wordOccurrences = verseData.clusters.map((cluster, clusterIndex): Occurrence => { + const occurrenceId = generateOccurrenceIdFromCluster(segmentId, cluster.id, clusterIndex); const assignments = cluster.lexemes.map((lexeme): AnalysisAssignment => { const analysisId = generateAnalysisId(lexeme.lexemeId, lexeme.senseId, glossLanguage); - const assignmentId = generateAssignmentId( - generateOccurrenceIdFromCluster(segmentId, cluster.id, clusterIndex), - analysisId, - ); + const assignmentId = generateAssignmentId(occurrenceId, analysisId); return { id: assignmentId, - occurrenceId: generateOccurrenceIdFromCluster(segmentId, cluster.id, clusterIndex), + occurrenceId, analysisId, status: verseData.hash ? AssignmentStatus.Approved : AssignmentStatus.Suggested, }; }); - const occurrenceId = generateOccurrenceIdFromCluster(segmentId, cluster.id, clusterIndex); - return { id: occurrenceId, segmentId, @@ -258,9 +254,9 @@ export function convertParatext9ToInterlinearization( return convertVerseToSegment(verseRef, verseData, glossLanguage); }); - const verseDataArray = Object.values(verses); - const verseWithHash = verseDataArray.find((verseData) => verseData.hash); - const textVersion = verseWithHash?.hash || ''; + const sortedVerseRefs = Object.keys(verses).sort(); + const firstVerseRefWithHash = sortedVerseRefs.find((ref) => verses[ref].hash); + const textVersion = firstVerseRefWithHash !== undefined ? verses[firstVerseRefWithHash].hash : ''; const analyzedBook: AnalyzedBook = { id: analyzedBookId, From 44199a046803676d50d5ed8d7ac01fe6e21a3351 Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Thu, 19 Feb 2026 16:16:45 -0700 Subject: [PATCH 4/8] Add SHA-256 hash-based text version generation and converter support - Introduce SHA-256 hashing for consistent book-level text version generation across Node and WebView environments. - Add Web Crypto-based sha256HexWebCrypto for WebView-safe hashing; support injectable hashSha256Hex in converter options for Node (e.g. paranext-core generateHashFromBuffer). - Compute book text version from sorted, concatenated verse hashes via computeBookTextVersion. - Update paratext9Converter and tests to align with hash-generation behavior and remove obsolete code. - Refactor interlinearizer WebView to use useEffect for async conversion and improve JSON view mode buttons. - Update documentation for data structures and types. --- .../interlinearizer.web-view.test.tsx | 100 +++++++---- .../paratext-9/paratext9Converter.test.ts | 169 ++++++++++++++---- src/interlinearizer.web-view.tsx | 95 +++++----- src/parsers/paratext-9/paratext9Converter.ts | 64 ++++++- src/parsers/paratext-9/pt9-xml.md | 10 +- 5 files changed, 303 insertions(+), 135 deletions(-) diff --git a/src/__tests__/interlinearizer.web-view.test.tsx b/src/__tests__/interlinearizer.web-view.test.tsx index eecb01c..b5e0766 100644 --- a/src/__tests__/interlinearizer.web-view.test.tsx +++ b/src/__tests__/interlinearizer.web-view.test.tsx @@ -4,7 +4,7 @@ import type { WebViewProps } from '@papi/core'; import type { SerializedVerseRef } from '@sillsdev/scripture'; -import { fireEvent, render, screen } from '@testing-library/react'; +import { act, fireEvent, render, screen, waitFor } from '@testing-library/react'; import type { InterlinearData } from 'paratext-9-types'; /** Stub InterlinearData returned by the mocked parser. Matches shape the WebView displays. */ @@ -23,7 +23,7 @@ const stubInterlinearization = { }; const mockParse = jest.fn().mockReturnValue(stubInterlinearData); -const mockConvert = jest.fn().mockReturnValue(stubInterlinearization); +const mockConvert = jest.fn().mockResolvedValue(stubInterlinearization); /** Stub analyses map for Analyses view (ID → Analysis). */ const stubAnalysesMap = new Map([ @@ -88,15 +88,31 @@ const testWebViewProps: WebViewProps = { updateWebViewDefinition: () => true, }; +/** + * Renders the WebView and waits for the mount effect's async conversion to settle inside act(). The + * component calls convertParatext9ToInterlinearization(parsed) in useEffect; when the promise + * resolves it calls setInterlinearization. Without waiting, that update runs after the test and + * triggers "An update to ... was not wrapped in act(...)". This helper flushes the async work so + * all state updates are wrapped. + */ +async function renderWebView(): Promise> { + return act(async () => { + const result = render(); + await Promise.resolve(); + await Promise.resolve(); + return result; + }); +} + describe('InterlinearizerWebView', () => { - it('renders the heading "Interlinearizer"', () => { - render(); + it('renders the heading "Interlinearizer"', async () => { + await renderWebView(); expect(screen.getByRole('heading', { name: /interlinearizer/i })).toBeInTheDocument(); }); - it('renders the description mentioning test-data XML', () => { - render(); + it('renders the description mentioning test-data XML', async () => { + await renderWebView(); expect( screen.getByText(/raw json of the model parsed from/i, { exact: false }), @@ -104,8 +120,8 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/test-data\/Interlinear_en_MAT\.xml/i)).toBeInTheDocument(); }); - it('renders the JSON view mode switch (InterlinearData / Interlinearization / Analyses)', () => { - render(); + it('renders the JSON view mode switch (InterlinearData / Interlinearization / Analyses)', async () => { + await renderWebView(); const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); expect(radiogroup).toBeInTheDocument(); @@ -115,55 +131,58 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/view json as:/i)).toBeInTheDocument(); }); - it('displays InterlinearData JSON by default when parser returns data', () => { - render(); + it('displays InterlinearData JSON by default when parser returns data', async () => { + await renderWebView(); expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); expect(screen.getByText(/glossLanguage/i)).toBeInTheDocument(); expect(screen.getByText(/bookId/i)).toBeInTheDocument(); }); - it('displays parsed structure including glossLanguage and bookId values', () => { - render(); + it('displays parsed structure including glossLanguage and bookId values', async () => { + await renderWebView(); expect(screen.getByText(/"en"/)).toBeInTheDocument(); expect(screen.getByText(/"MAT"/)).toBeInTheDocument(); }); - it('does not show parse error when parser succeeds', () => { - render(); + it('does not show parse error when parser succeeds', async () => { + await renderWebView(); expect(screen.queryByText(/^parse error$/i)).not.toBeInTheDocument(); }); - it('displays parse error when parser throws an Error (uses err.message)', () => { + it('displays parse error when parser throws an Error (uses err.message)', async () => { mockParse.mockImplementationOnce(() => { throw new Error('Invalid XML structure'); }); - render(); + await renderWebView(); expect(screen.getByRole('heading', { name: /^parse error$/i })).toBeInTheDocument(); expect(screen.getByText(/invalid xml structure/i)).toBeInTheDocument(); }); - it('switching to Interlinearization shows converted model JSON', () => { - render(); + it('switching to Interlinearization shows converted model JSON', async () => { + await renderWebView(); fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); - expect(screen.getByText(/analysisLanguages/i)).toBeInTheDocument(); - expect(screen.getByText(/sourceWritingSystem/i)).toBeInTheDocument(); - expect(screen.getByText(/segments/i)).toBeInTheDocument(); + await waitFor(() => { + expect(screen.getByText(/analysisLanguages/i)).toBeInTheDocument(); + expect(screen.getByText(/sourceWritingSystem/i)).toBeInTheDocument(); + expect(screen.getByText(/segments/i)).toBeInTheDocument(); + }); }); - it('switching back to InterlinearData shows PT9 structure JSON', () => { - render(); + it('switching back to InterlinearData shows PT9 structure JSON', async () => { + await renderWebView(); fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); - expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); - + await waitFor(() => { + expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); + }); fireEvent.click(screen.getByRole('radio', { name: /^interlineardata$/i })); expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); @@ -171,8 +190,8 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/bookId/i)).toBeInTheDocument(); }); - it('switching to Analyses shows analysis map JSON from test data', () => { - render(); + it('switching to Analyses shows analysis map JSON from test data', async () => { + await renderWebView(); fireEvent.click(screen.getByRole('radio', { name: /^analyses$/i })); @@ -183,11 +202,14 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/paratext-9/i)).toBeInTheDocument(); }); - it('renders empty JSON pre when jsonToShow is undefined (converter returns undefined)', () => { - mockConvert.mockReturnValueOnce(undefined); + it('renders empty JSON pre when jsonToShow is undefined (converter returns undefined)', async () => { + mockConvert.mockResolvedValueOnce(undefined); - const { container } = render(); + const { container } = await renderWebView(); fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); + await waitFor(() => { + expect(container.querySelector('pre')).toBeInTheDocument(); + }); const jsonPre = container.querySelector('pre'); expect(jsonPre).toBeInTheDocument(); @@ -195,16 +217,30 @@ describe('InterlinearizerWebView', () => { expect(jsonPre).not.toHaveTextContent('undefined'); }); - it('displays parse error when parser throws non-Error (uses String(err))', () => { + it('displays parse error when parser throws non-Error (uses String(err))', async () => { mockParse.mockImplementationOnce(() => { // Intentionally throw a non-Error to test the String(err) branch in the catch block. // eslint-disable-next-line no-throw-literal -- testing non-Error handling throw 'plain string error'; }); - render(); + await renderWebView(); expect(screen.getByRole('heading', { name: /^parse error$/i })).toBeInTheDocument(); expect(screen.getByText('plain string error')).toBeInTheDocument(); }); + + it('sets interlinearization to undefined when converter rejects', async () => { + mockConvert.mockRejectedValueOnce(new Error('Conversion failed')); + + const { container } = await renderWebView(); + fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); + await waitFor(() => { + expect(container.querySelector('pre')).toBeInTheDocument(); + }); + + const jsonPre = container.querySelector('pre'); + expect(jsonPre).toBeInTheDocument(); + expect(jsonPre).toBeEmptyDOMElement(); + }); }); diff --git a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts index 842ff50..f9348ba 100644 --- a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts +++ b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts @@ -1,21 +1,38 @@ -/** @file Unit tests for {@link convertParatext9ToInterlinearization} and {@link createAnalyses}. */ +/** + * @file Unit tests for {@link convertParatext9ToInterlinearization} and {@link createAnalyses}. + * @jest-environment node + */ /// +import { createHash } from 'crypto'; import type { InterlinearData } from 'paratext-9-types'; import { convertParatext9ToInterlinearization, createAnalyses, } from 'parsers/paratext-9/paratext9Converter'; +/** SHA-256 hex hasher using Node crypto. */ +function nodeSha256Hex(str: string): Promise { + return Promise.resolve(createHash('sha256').update(str, 'utf8').digest('hex')); +} + +/** Options for converter calls in tests: use Node crypto. */ +const nodeHashOptions = { hashSha256Hex: nodeSha256Hex }; + +/** Expected textVersion for a single verse hash: SHA-256( hash ) in hex. */ +function expectedTextVersionForSingleHash(hash: string): string { + return createHash('sha256').update(hash, 'utf8').digest('hex'); +} + describe('convertParatext9ToInterlinearization', () => { describe('top-level structure', () => { - it('produces Interlinearization with id, sourceWritingSystem, analysisLanguages, books', () => { + it('produces Interlinearization with id, sourceWritingSystem, analysisLanguages, books', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', verses: {}, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result).toHaveProperty('id'); expect(result).toHaveProperty('sourceWritingSystem', ''); @@ -25,46 +42,46 @@ describe('convertParatext9ToInterlinearization', () => { expect(Array.isArray(result.books)).toBe(true); }); - it('uses bookId for interlinearization id (lowercase, spaces to dashes)', () => { + it('uses bookId for interlinearization id (lowercase, spaces to dashes)', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'RUT', verses: {}, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.id).toBe('rut-interlinear'); }); - it('produces id mat-interlinear when bookId is MAT', () => { + it('produces id mat-interlinear when bookId is MAT', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', verses: {}, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.id).toBe('mat-interlinear'); }); - it('sets analysisLanguages from glossLanguage', () => { + it('sets analysisLanguages from glossLanguage', async () => { const data: InterlinearData = { glossLanguage: 'fr', bookId: 'GEN', verses: {}, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.analysisLanguages).toEqual(['fr']); }); - it('produces exactly one AnalyzedBook with id, bookRef, textVersion, segments', () => { + it('produces exactly one AnalyzedBook with id, bookRef, textVersion, segments', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', verses: {}, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books).toHaveLength(1); const book = result.books[0]; @@ -77,21 +94,95 @@ describe('convertParatext9ToInterlinearization', () => { }); describe('empty verses', () => { - it('returns empty segments array and empty textVersion when verses is empty', () => { + it('returns empty segments array and empty textVersion when verses is empty', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', verses: {}, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books[0].segments).toEqual([]); expect(result.books[0].textVersion).toBe(''); }); }); + describe('textVersion (composite book-level digest)', () => { + it('is empty when no verse has a hash', async () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { hash: '', clusters: [], punctuations: [] }, + 'MAT 1:2': { hash: '', clusters: [], punctuations: [] }, + }, + }; + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); + expect(result.books[0].textVersion).toBe(''); + }); + + it('is SHA-256 of sorted concatenated hashes when multiple verses have hashes', async () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:2': { + hash: 'hash2', + clusters: [], + punctuations: [], + }, + 'MAT 1:1': { + hash: 'hash1', + clusters: [], + punctuations: [], + }, + }, + }; + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); + const sortedHashes = ['hash1', 'hash2'].sort(); + const expected = createHash('sha256').update(sortedHashes.join(''), 'utf8').digest('hex'); + expect(result.books[0].textVersion).toBe(expected); + }); + + it('uses Web Crypto (sha256HexWebCrypto) when hashSha256Hex option is omitted', async () => { + const data: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { hash: 'a', clusters: [], punctuations: [] }, + 'MAT 1:2': { hash: 'b', clusters: [], punctuations: [] }, + }, + }; + const result = await convertParatext9ToInterlinearization(data); + const sortedHashes = ['a', 'b'].sort(); + const expected = createHash('sha256').update(sortedHashes.join(''), 'utf8').digest('hex'); + expect(result.books[0].textVersion).toBe(expected); + }); + + it('changes when any verse hash changes', async () => { + const base: InterlinearData = { + glossLanguage: 'en', + bookId: 'MAT', + verses: { + 'MAT 1:1': { hash: 'h1', clusters: [], punctuations: [] }, + 'MAT 1:2': { hash: 'h2', clusters: [], punctuations: [] }, + }, + }; + const result1 = await convertParatext9ToInterlinearization(base, nodeHashOptions); + const modified = { + ...base, + verses: { + ...base.verses, + 'MAT 1:2': { ...base.verses['MAT 1:2'], hash: 'h2-modified' }, + }, + }; + const result2 = await convertParatext9ToInterlinearization(modified, nodeHashOptions); + expect(result1.books[0].textVersion).not.toBe(result2.books[0].textVersion); + }); + }); + describe('verse to segment conversion', () => { - it('converts one verse with one cluster to one segment with one word occurrence', () => { + it('converts one verse with one cluster to one segment with one word occurrence', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -111,7 +202,7 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books[0].segments).toHaveLength(1); const seg = result.books[0].segments[0]; @@ -137,7 +228,7 @@ describe('convertParatext9ToInterlinearization', () => { expect(assign.id).toBe(`assign-${occ.id}-analysis-en-Word:word-sense1`); }); - it('uses verse hash for textVersion and sets assignment status to approved when verse has hash', () => { + it('uses composite book-level digest for textVersion and sets assignment status to approved when verse has hash', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -157,15 +248,15 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); - expect(result.books[0].textVersion).toBe('ABC123'); + expect(result.books[0].textVersion).toBe(expectedTextVersionForSingleHash('ABC123')); expect(result.books[0].segments[0].occurrences[0].assignments[0].status).toBe('approved'); }); }); describe('assignment status from verse hash', () => { - it('sets assignment status to suggested when verse has no hash', () => { + it('sets assignment status to suggested when verse has no hash', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -185,12 +276,12 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books[0].segments[0].occurrences[0].assignments[0].status).toBe('suggested'); }); - it('sets assignment status to approved when verse has hash', () => { + it('sets assignment status to approved when verse has hash', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -210,15 +301,15 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); - expect(result.books[0].textVersion).toBe('H1'); + expect(result.books[0].textVersion).toBe(expectedTextVersionForSingleHash('H1')); expect(result.books[0].segments[0].occurrences[0].assignments[0].status).toBe('approved'); }); }); describe('cluster with multiple lexemes', () => { - it('creates one word occurrence with multiple assignments (one per lexeme)', () => { + it('creates one word occurrence with multiple assignments (one per lexeme)', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -241,7 +332,7 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); const occ = result.books[0].segments[0].occurrences[0]; expect(occ.assignments).toHaveLength(2); @@ -254,7 +345,7 @@ describe('convertParatext9ToInterlinearization', () => { }); describe('punctuation occurrences', () => { - it('converts punctuations to punctuation occurrences after word occurrences (surfaceText from afterText when present)', () => { + it('converts punctuations to punctuation occurrences after word occurrences (surfaceText from afterText when present)', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -280,7 +371,7 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); const seg = result.books[0].segments[0]; expect(seg.occurrences).toHaveLength(2); @@ -294,7 +385,7 @@ describe('convertParatext9ToInterlinearization', () => { expect(puncOcc.id).toBe('mat-1:1-punc-1-34-2'); }); - it('uses beforeText for surfaceText when afterText is empty', () => { + it('uses beforeText for surfaceText when afterText is empty', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -306,12 +397,12 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books[0].segments[0].occurrences[0].surfaceText).toBe(','); }); - it('uses empty surfaceText when both beforeText and afterText are empty', () => { + it('uses empty surfaceText when both beforeText and afterText are empty', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -323,14 +414,14 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books[0].segments[0].occurrences[0].surfaceText).toBe(''); }); }); describe('verse with no clusters', () => { - it('produces segment with empty occurrences when verse has no clusters and no punctuations', () => { + it('produces segment with empty occurrences when verse has no clusters and no punctuations', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -342,7 +433,7 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books[0].segments).toHaveLength(1); expect(result.books[0].segments[0].occurrences).toEqual([]); @@ -352,7 +443,7 @@ describe('convertParatext9ToInterlinearization', () => { }); describe('lexeme without senseId', () => { - it('generates analysis id without sense suffix when senseId is empty', () => { + it('generates analysis id without sense suffix when senseId is empty', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -372,7 +463,7 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books[0].segments[0].occurrences[0].assignments[0].analysisId).toBe( 'analysis-en-Word:a', @@ -381,7 +472,7 @@ describe('convertParatext9ToInterlinearization', () => { }); describe('segment and occurrence IDs', () => { - it('generates segment id from verseRef (lowercase, spaces to dashes)', () => { + it('generates segment id from verseRef (lowercase, spaces to dashes)', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -401,12 +492,12 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); expect(result.books[0].segments[0].id).toBe('mat-1:1'); }); - it('generates occurrence id from segmentId, cluster id, and index', () => { + it('generates occurrence id from segmentId, cluster id, and index', async () => { const data: InterlinearData = { glossLanguage: 'en', bookId: 'MAT', @@ -426,7 +517,7 @@ describe('convertParatext9ToInterlinearization', () => { }, }, }; - const result = convertParatext9ToInterlinearization(data); + const result = await convertParatext9ToInterlinearization(data, nodeHashOptions); const segId = result.books[0].segments[0].id; expect(result.books[0].segments[0].occurrences[0].id).toBe(`${segId}-occ-0-Word:word/0-4`); diff --git a/src/interlinearizer.web-view.tsx b/src/interlinearizer.web-view.tsx index a14a998..e13f43d 100644 --- a/src/interlinearizer.web-view.tsx +++ b/src/interlinearizer.web-view.tsx @@ -1,4 +1,4 @@ -import { useMemo, useState } from 'react'; +import { useEffect, useMemo, useState } from 'react'; import type { InterlinearData } from 'paratext-9-types'; import { Paratext9Parser } from 'parsers/paratext-9/paratext9Parser'; import { @@ -50,24 +50,38 @@ globalThis.webViewComponent = function InterlinearizerWebView() { } }, []); - const interlinearization = useMemo( - () => (parsed ? convertParatext9ToInterlinearization(parsed) : undefined), - [parsed], - ); + const [interlinearization, setInterlinearization] = useState< + Awaited> | undefined + >(undefined); + + useEffect(() => { + if (!parsed) { + setInterlinearization(undefined); + return; + } + let cancelled = false; + convertParatext9ToInterlinearization(parsed) + .then((result) => { + if (!cancelled) setInterlinearization(result); + return result; + }) + .catch(() => { + if (!cancelled) setInterlinearization(undefined); + }); + return () => { + cancelled = true; + }; + }, [parsed]); /** Analyses map derived from parsed data (ID → Analysis); only defined when parsed exists. */ const analysesMap = useMemo(() => (parsed ? createAnalyses(parsed) : undefined), [parsed]); /** Data to show as JSON: depends on selected view mode. */ - const jsonToShow = ((): - | typeof parsed - | ReturnType - | Record - | undefined => { + const jsonToShow = useMemo(() => { if (jsonViewMode === 'interlinearization') return interlinearization; if (jsonViewMode === 'analyses' && analysesMap) return Object.fromEntries(analysesMap); return parsed; - })(); + }, [jsonViewMode, parsed, interlinearization, analysesMap]); return (
@@ -94,45 +108,26 @@ globalThis.webViewComponent = function InterlinearizerWebView() { role="radiogroup" aria-label="JSON view mode" > - - - + {[ + { key: 'interlinear-data' as const, label: 'InterlinearData' }, + { key: 'interlinearization' as const, label: 'Interlinearization' }, + { key: 'analyses' as const, label: 'Analyses' }, + ].map(({ key, label }) => ( + + ))}

{getViewModeDescription(jsonViewMode)} diff --git a/src/parsers/paratext-9/paratext9Converter.ts b/src/parsers/paratext-9/paratext9Converter.ts index 27ae517..66ffc96 100644 --- a/src/parsers/paratext-9/paratext9Converter.ts +++ b/src/parsers/paratext-9/paratext9Converter.ts @@ -22,6 +22,46 @@ import { Confidence, } from 'types/interlinearizer-enums'; +/** + * Default SHA-256 hex implementation using the Web Crypto API so the converter can run in WebViews. + * + * @param input - UTF-8 string to hash. + * @returns Promise that resolves to the hex-encoded SHA-256 digest. + */ +async function sha256HexWebCrypto(input: string): Promise { + const encoder = new TextEncoder(); + const data = encoder.encode(input); + const hashBuffer = await globalThis.crypto.subtle.digest('SHA-256', data); + const hashArray = Array.from(new Uint8Array(hashBuffer)); + return hashArray.map((b) => b.toString(16).padStart(2, '0')).join(''); +} + +/** + * Computes a stable book-level text version from verse hashes. + * + * Collects all non-empty verse hashes, sorts them deterministically, concatenates them, and returns + * the SHA-256 digest in hex. Used so that textVersion reflects changes in any verse. Uses the + * provided hasher or the default Web Crypto implementation (for WebViews). + * + * @param verseDataArray - Verse data in deterministic (e.g. sorted by ref) order. + * @param hashSha256Hex - Optional hasher; when omitted, uses Web Crypto. In Node contexts pass one + * that matches paranext-core's generateHashFromBuffer('sha256', 'hex', Buffer.from(str, + * 'utf8')). + * @returns Promise that resolves to the hex SHA-256 digest, or '' if no verse hashes. + */ +async function computeBookTextVersion( + verseDataArray: VerseData[], + hashSha256Hex: (input: string) => Promise, +): Promise { + const nonEmptyHashes = verseDataArray + .map((vd) => vd.hash) + .filter((h): h is string => h.length > 0); + if (nonEmptyHashes.length === 0) return ''; + const sortedHashes = [...nonEmptyHashes].sort(); + const concatenated = sortedHashes.join(''); + return hashSha256Hex(concatenated); +} + /** * Generates a deterministic ID for an interlinearization from Paratext 9 data. * @@ -138,7 +178,6 @@ function convertVerseToSegment( const assignments = cluster.lexemes.map((lexeme): AnalysisAssignment => { const analysisId = generateAnalysisId(lexeme.lexemeId, lexeme.senseId, glossLanguage); const assignmentId = generateAssignmentId(occurrenceId, analysisId); - return { id: assignmentId, occurrenceId, @@ -146,7 +185,6 @@ function convertVerseToSegment( status: verseData.hash ? AssignmentStatus.Approved : AssignmentStatus.Suggested, }; }); - return { id: occurrenceId, segmentId, @@ -238,14 +276,22 @@ export function createAnalyses(interlinearData: InterlinearData): Map Promise; +}; + +export async function convertParatext9ToInterlinearization( interlinearData: InterlinearData, -): Interlinearization { + options?: ConvertParatext9Options, +): Promise { const { glossLanguage, bookId, verses } = interlinearData; + const hashSha256Hex = options?.hashSha256Hex ?? sha256HexWebCrypto; const interlinearizationId = generateInterlinearizationId(bookId); const analyzedBookId = generateBookId(bookId); @@ -255,8 +301,8 @@ export function convertParatext9ToInterlinearization( }); const sortedVerseRefs = Object.keys(verses).sort(); - const firstVerseRefWithHash = sortedVerseRefs.find((ref) => verses[ref].hash); - const textVersion = firstVerseRefWithHash !== undefined ? verses[firstVerseRefWithHash].hash : ''; + const verseDataArray = sortedVerseRefs.map((ref) => verses[ref]); + const textVersion = await computeBookTextVersion(verseDataArray, hashSha256Hex); const analyzedBook: AnalyzedBook = { id: analyzedBookId, diff --git a/src/parsers/paratext-9/pt9-xml.md b/src/parsers/paratext-9/pt9-xml.md index 7cb7caf..6c8cd0a 100644 --- a/src/parsers/paratext-9/pt9-xml.md +++ b/src/parsers/paratext-9/pt9-xml.md @@ -41,12 +41,12 @@ The extension reads PT9 interlinear data from XML files (e.g. `Interlinear_ Date: Thu, 19 Feb 2026 17:28:51 -0700 Subject: [PATCH 5/8] Update package configuration and enhance interlinearizer WebView functionality - Add Node.js version requirement (>=18) to package.json and package-lock.json. - Improve interlinearizer WebView by implementing keyboard navigation for JSON view modes, allowing users to switch between modes using arrow keys. - Refactor related tests to ensure proper functionality of the new keyboard navigation feature. - Update README to reflect the new Node.js requirement and clarify usage of test data paths. --- README.md | 6 +- package-lock.json | 3 + package.json | 3 + .../interlinearizer.web-view.test.tsx | 167 +++++++++++++++++- .../paratext-9/paratext9Converter.test.ts | 2 +- .../paratext-9/paratext9Parser.test.ts | 12 +- src/__tests__/test-helpers.ts | 15 +- src/interlinearizer.web-view.tsx | 61 +++++-- src/parsers/paratext-9/paratext-9-types.ts | 114 ++++++------ src/parsers/paratext-9/paratext9Converter.ts | 6 +- src/parsers/paratext-9/paratext9Parser.ts | 6 +- src/types/interlinearizer-enums.ts | 6 +- 12 files changed, 308 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index 64ef32c..0e5b2da 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ The general file structure for an extension is as follows: - `assets/descriptions/description-.md` contains a brief description of the extension in the language specified by `` - `contributions/` contains JSON files the platform uses to extend data structures for things like menus and settings. The JSON files are referenced from the manifest - `public/` contains other static files that are copied into the build folder -- `test-data/` contains sample interlinear XML (e.g. `Interlinear_en_MAT.xml`) for development and tests +- `test-data/` contains sample interlinear XML (e.g. `Interlinear_en_MAT.xml`) for development and tests. In tests, resolve paths via `getTestDataPath('Interlinear_en_MAT.xml')` from `src/__tests__/test-helpers` rather than building paths with `..` segments. - `.github/` contains files to facilitate integration with GitHub - `.github/workflows` contains [GitHub Actions](https://github.com/features/actions) workflows for automating various processes in this repo (e.g. **Test** and **Lint** on push/PR to main, release-prep, hotfix-\*; **Publish** and **Bump Versions** manual dispatch; **CodeQL** for security) - `.github/assets/release-body.md` combined with a generated changelog becomes the body of [releases published using GitHub Actions](#publishing) @@ -119,6 +119,10 @@ The general file structure for an extension is as follows: ## To install +### Requirements + +- **Node.js >= 18** is required. The test suite uses the Web Crypto API (`globalThis.crypto.subtle`) for hashing in `paratext9Converter` tests (e.g. the `sha256HexWebCrypto` path in `src/__tests__/parsers/paratext-9/paratext9Converter.test.ts` when `convertParatext9ToInterlinearization` is called without the `hashSha256Hex` option). Node 18+ provides this API; older versions will cause those tests to fail. The same requirement is enforced in `package.json` via `engines.node` and is used by CI. + ### Install dependencies: 1. Follow the instructions to install [`paranext-core`](https://github.com/paranext/paranext-core#developer-install). We recommend you clone `paranext-core` in the same parent directory in which you cloned this repository so you do not have to [reconfigure paths](#configure-paths-to-paranext-core-repo) to `paranext-core`. diff --git a/package-lock.json b/package-lock.json index 080ad78..af7ddf1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -73,6 +73,9 @@ "webpack-merge": "^6.0.1", "zip-build": "^1.8.0" }, + "engines": { + "node": ">=18" + }, "peerDependencies": { "react": ">=18.3.1", "react-dom": ">=18.3.1" diff --git a/package.json b/package.json index 993e48e..ac6c990 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,9 @@ "types": "src/types/interlinearizer.d.ts", "author": "SIL Global", "license": "MIT", + "engines": { + "node": ">=18" + }, "scripts": { "build:web-view": "webpack --config ./webpack/webpack.config.web-view.ts", "build:main": "webpack --config ./webpack/webpack.config.main.ts", diff --git a/src/__tests__/interlinearizer.web-view.test.tsx b/src/__tests__/interlinearizer.web-view.test.tsx index b5e0766..45e60a4 100644 --- a/src/__tests__/interlinearizer.web-view.test.tsx +++ b/src/__tests__/interlinearizer.web-view.test.tsx @@ -5,7 +5,8 @@ import type { WebViewProps } from '@papi/core'; import type { SerializedVerseRef } from '@sillsdev/scripture'; import { act, fireEvent, render, screen, waitFor } from '@testing-library/react'; -import type { InterlinearData } from 'paratext-9-types'; +import React from 'react'; +import type { InterlinearData } from 'parsers/paratext-9/paratext-9-types'; /** Stub InterlinearData returned by the mocked parser. Matches shape the WebView displays. */ const stubInterlinearData: InterlinearData = { @@ -202,6 +203,21 @@ describe('InterlinearizerWebView', () => { expect(screen.getByText(/paratext-9/i)).toBeInTheDocument(); }); + it('Analyses view shows empty JSON pre when createAnalyses returns undefined', async () => { + mockCreateAnalyses.mockReturnValueOnce(undefined); + + const { container } = await renderWebView(); + fireEvent.click(screen.getByRole('radio', { name: /^analyses$/i })); + await waitFor(() => { + expect(screen.getByText(/^Analyses \(JSON\):$/)).toBeInTheDocument(); + }); + + const jsonPre = container.querySelector('pre'); + expect(jsonPre).toBeInTheDocument(); + expect(jsonPre).toBeEmptyDOMElement(); + expect(jsonPre).not.toHaveTextContent('undefined'); + }); + it('renders empty JSON pre when jsonToShow is undefined (converter returns undefined)', async () => { mockConvert.mockResolvedValueOnce(undefined); @@ -243,4 +259,153 @@ describe('InterlinearizerWebView', () => { expect(jsonPre).toBeInTheDocument(); expect(jsonPre).toBeEmptyDOMElement(); }); + + describe('handleJsonViewModeKeyDown', () => { + it('ArrowRight moves to next mode and updates selection', async () => { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowRight' }); + }); + + expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^interlinearization$/i })).toHaveAttribute( + 'aria-checked', + 'true', + ); + }); + + it('ArrowDown moves to next mode', async () => { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowDown' }); + }); + expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowDown' }); + }); + expect(screen.getByText(/^Analyses \(JSON\):$/)).toBeInTheDocument(); + }); + + it('ArrowRight from last mode (Analyses) wraps to first (InterlinearData)', async () => { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + fireEvent.click(screen.getByRole('radio', { name: /^analyses$/i })); + expect(screen.getByText(/^Analyses \(JSON\):$/)).toBeInTheDocument(); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowRight' }); + }); + + expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^interlineardata$/i })).toHaveAttribute( + 'aria-checked', + 'true', + ); + }); + + it('ArrowLeft moves to previous mode', async () => { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + fireEvent.click(screen.getByRole('radio', { name: /^analyses$/i })); + expect(screen.getByText(/^Analyses \(JSON\):$/)).toBeInTheDocument(); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowLeft' }); + }); + + expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^interlinearization$/i })).toHaveAttribute( + 'aria-checked', + 'true', + ); + }); + + it('ArrowUp moves to previous mode', async () => { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowUp' }); + }); + + expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^interlineardata$/i })).toHaveAttribute( + 'aria-checked', + 'true', + ); + }); + + it('ArrowLeft from first mode (InterlinearData) wraps to last (Analyses)', async () => { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowLeft' }); + }); + + expect(screen.getByText(/^Analyses \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^analyses$/i })).toHaveAttribute( + 'aria-checked', + 'true', + ); + }); + + it('non-arrow key does not change mode', async () => { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); + + fireEvent.keyDown(radiogroup, { key: 'a' }); + fireEvent.keyDown(radiogroup, { key: 'Enter' }); + expect(screen.getByText(/^InterlinearData \(JSON\):$/)).toBeInTheDocument(); + expect(screen.getByRole('radio', { name: /^interlineardata$/i })).toHaveAttribute( + 'aria-checked', + 'true', + ); + }); + + it('moves focus to the newly selected radio on arrow key', async () => { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + const interlinearizationRadio = screen.getByRole('radio', { + name: /^interlinearization$/i, + }); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowRight' }); + }); + + expect(document.activeElement).toBe(interlinearizationRadio); + }); + + it('does nothing when current view mode is not in JSON_VIEW_MODES (idx === -1)', async () => { + const setJsonViewMode = jest.fn(); + let useStateCallCount = 0; + const useStateSpy = jest.spyOn(React, 'useState').mockImplementation(() => { + useStateCallCount += 1; + return useStateCallCount === 1 ? ['invalid', setJsonViewMode] : [undefined, jest.fn()]; + }); + + try { + await renderWebView(); + const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); + + await act(async () => { + fireEvent.keyDown(radiogroup, { key: 'ArrowRight' }); + }); + + expect(setJsonViewMode).not.toHaveBeenCalled(); + } finally { + useStateSpy.mockRestore(); + } + }); + }); }); diff --git a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts index f9348ba..f64a101 100644 --- a/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts +++ b/src/__tests__/parsers/paratext-9/paratext9Converter.test.ts @@ -5,11 +5,11 @@ /// import { createHash } from 'crypto'; -import type { InterlinearData } from 'paratext-9-types'; import { convertParatext9ToInterlinearization, createAnalyses, } from 'parsers/paratext-9/paratext9Converter'; +import type { InterlinearData } from 'parsers/paratext-9/paratext-9-types'; /** SHA-256 hex hasher using Node crypto. */ function nodeSha256Hex(str: string): Promise { diff --git a/src/__tests__/parsers/paratext-9/paratext9Parser.test.ts b/src/__tests__/parsers/paratext-9/paratext9Parser.test.ts index eed880b..ac9193d 100644 --- a/src/__tests__/parsers/paratext-9/paratext9Parser.test.ts +++ b/src/__tests__/parsers/paratext-9/paratext9Parser.test.ts @@ -2,9 +2,9 @@ /// import * as fs from 'fs'; -import * as path from 'path'; import { Paratext9Parser } from 'parsers/paratext-9/paratext9Parser'; +import { getTestDataPath } from '../../test-helpers'; describe('Paratext9Parser', () => { let parser: Paratext9Parser; @@ -575,15 +575,7 @@ describe('Paratext9Parser', () => { }); it('parses real test-data file without throwing', () => { - const xmlPath = path.join( - __dirname, - '..', - '..', - '..', - '..', - 'test-data', - 'Interlinear_en_MAT.xml', - ); + const xmlPath = getTestDataPath('Interlinear_en_MAT.xml'); const xml = fs.readFileSync(xmlPath, 'utf-8'); const result = parser.parse(xml); diff --git a/src/__tests__/test-helpers.ts b/src/__tests__/test-helpers.ts index 2669edc..9c63a67 100644 --- a/src/__tests__/test-helpers.ts +++ b/src/__tests__/test-helpers.ts @@ -1,10 +1,23 @@ /** * @file Test helpers used to build type-safe mocks without type assertions. Provides a minimal - * ExecutionActivationContext that satisfies @papi/core types. + * ExecutionActivationContext that satisfies @papi/core types, and a stable path resolver for the + * test-data directory. */ +import * as path from 'path'; + import type { ExecutionActivationContext } from '@papi/core'; import { UnsubscriberAsyncList } from 'platform-bible-utils'; +/** + * Resolves a path to a file under the project's test-data directory. + * + * @param relativePath - Filename or path relative to test-data (e.g. 'Interlinear_en_MAT.xml'). + * @returns Absolute path to the file under test-data. + */ +export function getTestDataPath(relativePath: string): string { + return path.resolve(__dirname, '..', '..', 'test-data', relativePath); +} + /** Minimal execution token-shaped object for tests (structural match for ExecutionToken). */ const mockExecutionToken: { type: 'extension'; diff --git a/src/interlinearizer.web-view.tsx b/src/interlinearizer.web-view.tsx index e13f43d..6de060b 100644 --- a/src/interlinearizer.web-view.tsx +++ b/src/interlinearizer.web-view.tsx @@ -1,11 +1,12 @@ -import { useEffect, useMemo, useState } from 'react'; -import type { InterlinearData } from 'paratext-9-types'; +import React, { useEffect, useMemo, useRef, useState } from 'react'; +import type { InterlinearData } from 'parsers/paratext-9/paratext-9-types'; import { Paratext9Parser } from 'parsers/paratext-9/paratext9Parser'; import { convertParatext9ToInterlinearization, createAnalyses, } from 'parsers/paratext-9/paratext9Converter'; +import type { Interlinearization } from 'interlinearizer'; /** Test interlinear XML bundled at build time (from test-data/Interlinear_en_MAT.xml). */ import testXml from '../test-data/Interlinear_en_MAT.xml?raw'; @@ -15,6 +16,13 @@ type ParseResult = { data: InterlinearData; error: undefined } | { data: undefin /** View mode for the JSON display: raw PT9, converted model, or analyses map. */ type JsonViewMode = 'interlinear-data' | 'interlinearization' | 'analyses'; +/** Ordered list of JSON view modes for rendering and arrow-key navigation. */ +const JSON_VIEW_MODES: { key: JsonViewMode; label: string }[] = [ + { key: 'interlinear-data', label: 'InterlinearData' }, + { key: 'interlinearization', label: 'Interlinearization' }, + { key: 'analyses', label: 'Analyses' }, +]; + function getViewModeDescription(mode: JsonViewMode): string { if (mode === 'interlinear-data') return 'Paratext 9 book/verse/cluster structure.'; if (mode === 'interlinearization') @@ -40,6 +48,34 @@ function getViewModeLabel(mode: JsonViewMode): string { globalThis.webViewComponent = function InterlinearizerWebView() { const [jsonViewMode, setJsonViewMode] = useState('interlinear-data'); + /** Refs to each radio button for moving focus on arrow-key navigation. */ + const radioRefs = useRef>({ + 'interlinear-data': undefined, + interlinearization: undefined, + analyses: undefined, + }); + + /** + * Handles arrow keys on the JSON view mode radiogroup: Left/Up select previous, Right/Down select + * next; updates selection and moves focus to the new radio. + */ + const handleJsonViewModeKeyDown = (e: React.KeyboardEvent) => { + const idx = JSON_VIEW_MODES.findIndex((m) => m.key === jsonViewMode); + if (idx === -1) return; + let nextKey: JsonViewMode | undefined; + if (e.key === 'ArrowRight' || e.key === 'ArrowDown') { + e.preventDefault(); + nextKey = JSON_VIEW_MODES[(idx + 1) % JSON_VIEW_MODES.length].key; + setJsonViewMode(nextKey); + radioRefs.current[nextKey]?.focus(); + } else if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') { + e.preventDefault(); + nextKey = JSON_VIEW_MODES[(idx - 1 + JSON_VIEW_MODES.length) % JSON_VIEW_MODES.length].key; + setJsonViewMode(nextKey); + radioRefs.current[nextKey]?.focus(); + } + }; + const { data: parsed, error: parseError } = useMemo((): ParseResult => { const parser = new Paratext9Parser(); try { @@ -50,9 +86,7 @@ globalThis.webViewComponent = function InterlinearizerWebView() { } }, []); - const [interlinearization, setInterlinearization] = useState< - Awaited> | undefined - >(undefined); + const [interlinearization, setInterlinearization] = useState(); useEffect(() => { if (!parsed) { @@ -79,7 +113,8 @@ globalThis.webViewComponent = function InterlinearizerWebView() { /** Data to show as JSON: depends on selected view mode. */ const jsonToShow = useMemo(() => { if (jsonViewMode === 'interlinearization') return interlinearization; - if (jsonViewMode === 'analyses' && analysesMap) return Object.fromEntries(analysesMap); + if (jsonViewMode === 'analyses') + return analysesMap ? Object.fromEntries(analysesMap) : undefined; return parsed; }, [jsonViewMode, parsed, interlinearization, analysesMap]); @@ -107,23 +142,25 @@ globalThis.webViewComponent = function InterlinearizerWebView() { className="tw-inline-flex tw-rounded-md tw-border tw-border-border tw-bg-muted tw-p-0.5" role="radiogroup" aria-label="JSON view mode" + tabIndex={-1} + onKeyDown={handleJsonViewModeKeyDown} > - {[ - { key: 'interlinear-data' as const, label: 'InterlinearData' }, - { key: 'interlinearization' as const, label: 'Interlinearization' }, - { key: 'analyses' as const, label: 'Analyses' }, - ].map(({ key, label }) => ( + {JSON_VIEW_MODES.map(({ key, label }) => ( diff --git a/src/parsers/paratext-9/paratext-9-types.ts b/src/parsers/paratext-9/paratext-9-types.ts index 9aa1c3c..1ffd187 100644 --- a/src/parsers/paratext-9/paratext-9-types.ts +++ b/src/parsers/paratext-9/paratext-9-types.ts @@ -1,64 +1,62 @@ -declare module 'paratext-9-types' { - /** Character range in source text (Index, Length). */ - export interface StringRange { - /** Start index of the range in the source text (0-based). */ - index: number; - /** Number of characters in the range. */ - length: number; - } +/** Character range in source text (Index, Length). */ +export interface StringRange { + /** Start index of the range in the source text (0-based). */ + index: number; + /** Number of characters in the range. */ + length: number; +} - /** Data on the interlinearization of a single lexeme. */ - export interface LexemeData { - /** ID of the lexeme (e.g. from Lexicon; XML attribute Id). */ - lexemeId: string; - /** ID of the sense/gloss used for this lexeme (XML attribute GlossId). */ - senseId: string; - } +/** Data on the interlinearization of a single lexeme. */ +export interface LexemeData { + /** ID of the lexeme (e.g. from Lexicon; XML attribute Id). */ + lexemeId: string; + /** ID of the sense/gloss used for this lexeme (XML attribute GlossId). */ + senseId: string; +} - /** Data on the interlinearization of a cluster. */ - export interface ClusterData { - /** Character range this cluster occupies in the verse text. */ - textRange: StringRange; - /** Lexemes in this cluster, in order. */ - lexemes: LexemeData[]; - /** Slash-joined LexemeIds for this cluster (e.g. "Word:a/Word:b"). */ - lexemesId: string; - /** Unique cluster id: LexemesId plus TextRange (e.g. "Word:a/Word:b/21-3"). */ - id: string; - /** Excluded flag. See [pt9-xml.md](pt9-xml.md) for details. */ - excluded: boolean; - } +/** Data on the interlinearization of a cluster. */ +export interface ClusterData { + /** Character range this cluster occupies in the verse text. */ + textRange: StringRange; + /** Lexemes in this cluster, in order. */ + lexemes: LexemeData[]; + /** Slash-joined LexemeIds for this cluster (e.g. "Word:a/Word:b"). */ + lexemesId: string; + /** Unique cluster id: LexemesId plus TextRange (e.g. "Word:a/Word:b/21-3"). */ + id: string; + /** Excluded flag. See [pt9-xml.md](pt9-xml.md) for details. */ + excluded: boolean; +} - /** Data on punctuation change. */ - export interface PunctuationData { - /** Character range this punctuation occupies in the verse text. */ - textRange: StringRange; - /** Punctuation text before the change (or empty). */ - beforeText: string; - /** Punctuation text after the change (or empty). */ - afterText: string; - } +/** Data on punctuation change. */ +export interface PunctuationData { + /** Character range this punctuation occupies in the verse text. */ + textRange: StringRange; + /** Punctuation text before the change (or empty). */ + beforeText: string; + /** Punctuation text after the change (or empty). */ + afterText: string; +} - /** Interlinear data for a single verse. */ - export interface VerseData { - /** Hash of verse text when approved; empty string if not approved. */ - hash: string; - /** Lexeme clusters in this verse. */ - clusters: ClusterData[]; - /** Punctuation changes in this verse. */ - punctuations: PunctuationData[]; - } +/** Interlinear data for a single verse. */ +export interface VerseData { + /** Hash of verse text when approved; empty string if not approved. */ + hash: string; + /** Lexeme clusters in this verse. */ + clusters: ClusterData[]; + /** Punctuation changes in this verse. */ + punctuations: PunctuationData[]; +} - /** Root interlinear data: book + verses. */ - export interface InterlinearData { - /** Language code or name for the glosses. */ - glossLanguage: string; - /** Book id (e.g. "RUT", "MAT"). */ - bookId: string; - /** - * Verse data keyed by verse reference (e.g. "RUT 3:1"). Exactly one entry per reference; the - * parser rejects XML that contains duplicate verse references. - */ - verses: Record; - } +/** Root interlinear data: book + verses. */ +export interface InterlinearData { + /** Language code or name for the glosses. */ + glossLanguage: string; + /** Book id (e.g. "RUT", "MAT"). */ + bookId: string; + /** + * Verse data keyed by verse reference (e.g. "RUT 3:1"). Exactly one entry per reference; the + * parser rejects XML that contains duplicate verse references. + */ + verses: Record; } diff --git a/src/parsers/paratext-9/paratext9Converter.ts b/src/parsers/paratext-9/paratext9Converter.ts index 66ffc96..740f350 100644 --- a/src/parsers/paratext-9/paratext9Converter.ts +++ b/src/parsers/paratext-9/paratext9Converter.ts @@ -6,7 +6,6 @@ * interlinearizer's book/segment/occurrence/analysis structure. */ -import type { InterlinearData, VerseData, StringRange } from 'paratext-9-types'; import type { Interlinearization, AnalyzedBook, @@ -21,6 +20,7 @@ import { AssignmentStatus, Confidence, } from 'types/interlinearizer-enums'; +import type { InterlinearData, VerseData, StringRange } from './paratext-9-types'; /** * Default SHA-256 hex implementation using the Web Crypto API so the converter can run in WebViews. @@ -295,13 +295,13 @@ export async function convertParatext9ToInterlinearization( const interlinearizationId = generateInterlinearizationId(bookId); const analyzedBookId = generateBookId(bookId); + const sortedVerseRefs = Object.keys(verses).sort(); + const verseDataArray = sortedVerseRefs.map((ref) => verses[ref]); const segments = Object.entries(verses).map(([verseRef, verseData]) => { return convertVerseToSegment(verseRef, verseData, glossLanguage); }); - const sortedVerseRefs = Object.keys(verses).sort(); - const verseDataArray = sortedVerseRefs.map((ref) => verses[ref]); const textVersion = await computeBookTextVersion(verseDataArray, hashSha256Hex); const analyzedBook: AnalyzedBook = { diff --git a/src/parsers/paratext-9/paratext9Parser.ts b/src/parsers/paratext-9/paratext9Parser.ts index ce9372e..cfd9ebb 100644 --- a/src/parsers/paratext-9/paratext9Parser.ts +++ b/src/parsers/paratext-9/paratext9Parser.ts @@ -6,7 +6,7 @@ import type { StringRange, InterlinearData, VerseData, -} from 'paratext-9-types'; +} from './paratext-9-types'; /** Range: Index and Length attributes. */ interface ParsedRange { @@ -174,8 +174,8 @@ function extractClustersFromVerse(verseDataElement: ParsedVerseData): ClusterDat * Parses interlinear XML strings into {@link InterlinearData} using fast-xml-parser. * * Input is a raw XML string (caller is responsible for obtaining it, e.g. from file or network). - * Output matches the types in `interlinearizer`; no extra conversion is done. Expects the - * interlinear XML schema described in [pt9-xml.md](pt9-xml.md). + * Output matches the types in `paratext-9-types`; no extra conversion is done. Expects the Paratext + * 9 Interlinear XML schema described in [pt9-xml.md](pt9-xml.md). */ export class Paratext9Parser { private readonly parser: XMLParser; diff --git a/src/types/interlinearizer-enums.ts b/src/types/interlinearizer-enums.ts index 5ea8052..238baa4 100644 --- a/src/types/interlinearizer-enums.ts +++ b/src/types/interlinearizer-enums.ts @@ -30,10 +30,10 @@ export enum AnalysisType { /** * How the analysis was produced. * - * - `high` - * - `medium` - * - `low` * - `guess` + * - `low` + * - `medium` + * - `high` */ export enum Confidence { Guess = 'guess', From f8ba187900fec4fc4b7316fdea9d9e90acdeaf2a Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Thu, 19 Feb 2026 17:54:45 -0700 Subject: [PATCH 6/8] Enhance interlinearizer WebView with conversion status and keyboard navigation improvements - Export `JsonViewMode` type and add a sentinel for conversion status to indicate when interlinearization is in progress. - Implement `formatJsonPreContent` function to display "Converting..." during the conversion process. - Refactor keyboard navigation handling for JSON view modes, improving the separation of concerns and testability. - Update tests to verify the new conversion status display and ensure keyboard navigation functionality works as expected. --- .../interlinearizer.web-view.test.tsx | 72 ++++++++---- src/interlinearizer.web-view.tsx | 110 +++++++++++++----- src/parsers/paratext-9/paratext9Converter.ts | 7 +- 3 files changed, 134 insertions(+), 55 deletions(-) diff --git a/src/__tests__/interlinearizer.web-view.test.tsx b/src/__tests__/interlinearizer.web-view.test.tsx index 45e60a4..33c6d1f 100644 --- a/src/__tests__/interlinearizer.web-view.test.tsx +++ b/src/__tests__/interlinearizer.web-view.test.tsx @@ -5,7 +5,6 @@ import type { WebViewProps } from '@papi/core'; import type { SerializedVerseRef } from '@sillsdev/scripture'; import { act, fireEvent, render, screen, waitFor } from '@testing-library/react'; -import React from 'react'; import type { InterlinearData } from 'parsers/paratext-9/paratext-9-types'; /** Stub InterlinearData returned by the mocked parser. Matches shape the WebView displays. */ @@ -56,6 +55,9 @@ jest.mock('parsers/paratext-9/paratext9Converter', () => ({ createAnalyses: mockCreateAnalyses, })); +// eslint-disable-next-line import/first -- import order required for Jest mock initialization +import * as interlinearizerWebViewModule from '../interlinearizer.web-view'; + /** * Load the WebView module; it assigns the component to globalThis.webViewComponent. This pattern is * required by the Platform.Bible WebView framework: the WebView entry is built with a ?inline query @@ -63,9 +65,9 @@ jest.mock('parsers/paratext-9/paratext9Converter', () => ({ * component must require() the module and read globalThis. If the WebView export mechanism changes, * update this test accordingly. */ -require('../interlinearizer.web-view'); const InterlinearizerWebView = globalThis.webViewComponent; +const { handleJsonViewModeKeyDown } = interlinearizerWebViewModule; if (!InterlinearizerWebView) throw new Error('webViewComponent not loaded'); /** Minimal SerializedVerseRef for hook mock return. */ @@ -106,6 +108,10 @@ async function renderWebView(): Promise> { } describe('InterlinearizerWebView', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + it('renders the heading "Interlinearizer"', async () => { await renderWebView(); @@ -170,11 +176,9 @@ describe('InterlinearizerWebView', () => { fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); expect(screen.getByText(/^Interlinearization \(JSON\):$/)).toBeInTheDocument(); - await waitFor(() => { - expect(screen.getByText(/analysisLanguages/i)).toBeInTheDocument(); - expect(screen.getByText(/sourceWritingSystem/i)).toBeInTheDocument(); - expect(screen.getByText(/segments/i)).toBeInTheDocument(); - }); + await waitFor(() => expect(screen.getByText(/analysisLanguages/i)).toBeInTheDocument()); + await waitFor(() => expect(screen.getByText(/sourceWritingSystem/i)).toBeInTheDocument()); + await waitFor(() => expect(screen.getByText(/segments/i)).toBeInTheDocument()); }); it('switching back to InterlinearData shows PT9 structure JSON', async () => { @@ -233,6 +237,36 @@ describe('InterlinearizerWebView', () => { expect(jsonPre).not.toHaveTextContent('undefined'); }); + it('shows "Converting..." in Interlinearization view while conversion is in flight', async () => { + let resolveConvert: ((value: typeof stubInterlinearization) => void) | undefined; + const convertPromise = new Promise((resolve) => { + resolveConvert = resolve; + }); + mockConvert.mockReturnValueOnce(convertPromise); + + const { container } = await act(async () => { + const result = render(); + await Promise.resolve(); + return result; + }); + + fireEvent.click(screen.getByRole('radio', { name: /^interlinearization$/i })); + + await waitFor(() => { + const jsonPre = container.querySelector('pre'); + expect(jsonPre).toHaveTextContent('Converting...'); + }); + + await act(async () => { + if (resolveConvert) resolveConvert(stubInterlinearization); + await convertPromise; + }); + + await waitFor(() => { + expect(screen.getByText(/analysisLanguages/i)).toBeInTheDocument(); + }); + }); + it('displays parse error when parser throws non-Error (uses String(err))', async () => { mockParse.mockImplementationOnce(() => { // Intentionally throw a non-Error to test the String(err) branch in the catch block. @@ -386,26 +420,14 @@ describe('InterlinearizerWebView', () => { expect(document.activeElement).toBe(interlinearizationRadio); }); - it('does nothing when current view mode is not in JSON_VIEW_MODES (idx === -1)', async () => { + it('does nothing when current view mode is not in JSON_VIEW_MODES (idx === -1)', () => { const setJsonViewMode = jest.fn(); - let useStateCallCount = 0; - const useStateSpy = jest.spyOn(React, 'useState').mockImplementation(() => { - useStateCallCount += 1; - return useStateCallCount === 1 ? ['invalid', setJsonViewMode] : [undefined, jest.fn()]; - }); - - try { - await renderWebView(); - const radiogroup = screen.getByRole('radiogroup', { name: /json view mode/i }); - - await act(async () => { - fireEvent.keyDown(radiogroup, { key: 'ArrowRight' }); - }); + const focusRadio = jest.fn(); + // Pass a value not in JSON_VIEW_MODES so findIndex returns -1; handler takes string for testability. + handleJsonViewModeKeyDown('invalid', 'ArrowRight', setJsonViewMode, focusRadio); - expect(setJsonViewMode).not.toHaveBeenCalled(); - } finally { - useStateSpy.mockRestore(); - } + expect(setJsonViewMode).not.toHaveBeenCalled(); + expect(focusRadio).not.toHaveBeenCalled(); }); }); }); diff --git a/src/interlinearizer.web-view.tsx b/src/interlinearizer.web-view.tsx index 6de060b..5762a73 100644 --- a/src/interlinearizer.web-view.tsx +++ b/src/interlinearizer.web-view.tsx @@ -14,7 +14,13 @@ import testXml from '../test-data/Interlinear_en_MAT.xml?raw'; type ParseResult = { data: InterlinearData; error: undefined } | { data: undefined; error: string }; /** View mode for the JSON display: raw PT9, converted model, or analyses map. */ -type JsonViewMode = 'interlinear-data' | 'interlinearization' | 'analyses'; +export type JsonViewMode = 'interlinear-data' | 'interlinearization' | 'analyses'; + +/** + * Sentinel returned by jsonToShow when interlinearization mode is selected but conversion is still + * in progress. + */ +export const JSON_SHOW_CONVERTING = Symbol('JSON_SHOW_CONVERTING'); /** Ordered list of JSON view modes for rendering and arrow-key navigation. */ const JSON_VIEW_MODES: { key: JsonViewMode; label: string }[] = [ @@ -36,6 +42,49 @@ function getViewModeLabel(mode: JsonViewMode): string { return 'Analyses (JSON):'; } +/** Renders jsonToShow for the

: "Converting…" for sentinel, stringified JSON, or empty string. */
+function formatJsonPreContent(jsonToShow: unknown): string {
+  if (jsonToShow === JSON_SHOW_CONVERTING) return 'Converting...';
+  if (jsonToShow !== undefined) return JSON.stringify(jsonToShow, undefined, 2);
+  return '';
+}
+
+/**
+ * Pure handler for arrow-key navigation on the JSON view mode radiogroup. Left/Up select previous,
+ * Right/Down select next. Exported for unit testing.
+ *
+ * @param currentMode - Current JSON view mode as string (must be in {@link JSON_VIEW_MODES} or
+ *   no-op).
+ * @param eventKey - KeyboardEvent.key (e.g. 'ArrowRight', 'ArrowLeft').
+ * @param setJsonViewMode - State setter for view mode.
+ * @param focusRadio - Callback to focus the radio for a given mode (e.g.
+ *   refs.current[key]?.focus()).
+ * @returns True if the key was handled (caller should call event.preventDefault()).
+ */
+export function handleJsonViewModeKeyDown(
+  currentMode: string,
+  eventKey: string,
+  setJsonViewMode: (mode: JsonViewMode) => void,
+  focusRadio: (mode: JsonViewMode) => void,
+): boolean {
+  const idx = JSON_VIEW_MODES.findIndex((m) => m.key === currentMode);
+  if (idx === -1) return false;
+  if (eventKey === 'ArrowRight' || eventKey === 'ArrowDown') {
+    const nextKey = JSON_VIEW_MODES[(idx + 1) % JSON_VIEW_MODES.length].key;
+    setJsonViewMode(nextKey);
+    focusRadio(nextKey);
+    return true;
+  }
+  if (eventKey === 'ArrowLeft' || eventKey === 'ArrowUp') {
+    const nextKey =
+      JSON_VIEW_MODES[(idx - 1 + JSON_VIEW_MODES.length) % JSON_VIEW_MODES.length].key;
+    setJsonViewMode(nextKey);
+    focusRadio(nextKey);
+    return true;
+  }
+  return false;
+}
+
 /**
  * Main interlinearizer WebView. Parses the bundled test XML into the interlinear model and displays
  * the result as raw JSON. No PAPI commands or file loading—everything is self-contained.
@@ -55,24 +104,14 @@ globalThis.webViewComponent = function InterlinearizerWebView() {
     analyses: undefined,
   });
 
-  /**
-   * Handles arrow keys on the JSON view mode radiogroup: Left/Up select previous, Right/Down select
-   * next; updates selection and moves focus to the new radio.
-   */
-  const handleJsonViewModeKeyDown = (e: React.KeyboardEvent) => {
-    const idx = JSON_VIEW_MODES.findIndex((m) => m.key === jsonViewMode);
-    if (idx === -1) return;
-    let nextKey: JsonViewMode | undefined;
-    if (e.key === 'ArrowRight' || e.key === 'ArrowDown') {
+  /** Wires arrow-key events to the pure handler and prevents default when handled. */
+  const onJsonViewModeKeyDown = (e: React.KeyboardEvent) => {
+    if (
+      handleJsonViewModeKeyDown(jsonViewMode, e.key, setJsonViewMode, (key) =>
+        radioRefs.current[key]?.focus(),
+      )
+    ) {
       e.preventDefault();
-      nextKey = JSON_VIEW_MODES[(idx + 1) % JSON_VIEW_MODES.length].key;
-      setJsonViewMode(nextKey);
-      radioRefs.current[nextKey]?.focus();
-    } else if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') {
-      e.preventDefault();
-      nextKey = JSON_VIEW_MODES[(idx - 1 + JSON_VIEW_MODES.length) % JSON_VIEW_MODES.length].key;
-      setJsonViewMode(nextKey);
-      radioRefs.current[nextKey]?.focus();
     }
   };
 
@@ -87,20 +126,33 @@ globalThis.webViewComponent = function InterlinearizerWebView() {
   }, []);
 
   const [interlinearization, setInterlinearization] = useState();
+  /**
+   * True once the convert promise has resolved or rejected; used to show "Converting…" only while
+   * in flight.
+   */
+  const [conversionSettled, setConversionSettled] = useState(false);
 
   useEffect(() => {
     if (!parsed) {
       setInterlinearization(undefined);
+      setConversionSettled(false);
       return;
     }
+    setConversionSettled(false);
     let cancelled = false;
     convertParatext9ToInterlinearization(parsed)
       .then((result) => {
-        if (!cancelled) setInterlinearization(result);
+        if (!cancelled) {
+          setInterlinearization(result);
+          setConversionSettled(true);
+        }
         return result;
       })
       .catch(() => {
-        if (!cancelled) setInterlinearization(undefined);
+        if (!cancelled) {
+          setInterlinearization(undefined);
+          setConversionSettled(true);
+        }
       });
     return () => {
       cancelled = true;
@@ -110,13 +162,19 @@ globalThis.webViewComponent = function InterlinearizerWebView() {
   /** Analyses map derived from parsed data (ID → Analysis); only defined when parsed exists. */
   const analysesMap = useMemo(() => (parsed ? createAnalyses(parsed) : undefined), [parsed]);
 
-  /** Data to show as JSON: depends on selected view mode. */
-  const jsonToShow = useMemo(() => {
-    if (jsonViewMode === 'interlinearization') return interlinearization;
+  /**
+   * Data to show as JSON: depends on selected view mode. Shows converting sentinel when in
+   * interlinearization mode and conversion has not yet settled (promise still in flight).
+   */
+  const jsonToShow = useMemo((): unknown => {
+    if (jsonViewMode === 'interlinearization') {
+      if (interlinearization === undefined && !conversionSettled) return JSON_SHOW_CONVERTING;
+      return interlinearization;
+    }
     if (jsonViewMode === 'analyses')
       return analysesMap ? Object.fromEntries(analysesMap) : undefined;
     return parsed;
-  }, [jsonViewMode, parsed, interlinearization, analysesMap]);
+  }, [jsonViewMode, parsed, interlinearization, conversionSettled, analysesMap]);
 
   return (
     
@@ -143,7 +201,7 @@ globalThis.webViewComponent = function InterlinearizerWebView() { role="radiogroup" aria-label="JSON view mode" tabIndex={-1} - onKeyDown={handleJsonViewModeKeyDown} + onKeyDown={onJsonViewModeKeyDown} > {JSON_VIEW_MODES.map(({ key, label }) => (