Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<script src="js/hyperaudio-lite-editor-whisper.js"></script>
<script src="js/word-alignment.js"></script>
<script src="js/html-json-converter.js"></script>
<script src="js/gentle-json-converter.js"></script>

<!-- Meta Tags required for
Progressive Web App -->
Expand Down Expand Up @@ -289,6 +290,8 @@
</li>
<li><export-json></export-json></li>
<li><import-json></import-json></li>
<li><export-gentle-json></export-gentle-json></li>
<li><import-gentle-json></import-gentle-json></li>
<li><label for="file-import-deepgram-json-dialog">Import Deepgram JSON</label></li>
<li><label for="file-import-srt-dialog">Import SRT</label></li>
<li><label for="file-import-vtt-dialog">Import VTT</label></li>
Expand Down
225 changes: 225 additions & 0 deletions js/gentle-json-converter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
/*! (C) The Hyperaudio Project. AGPL 3.0 @license: https://www.gnu.org/licenses/agpl-3.0.en.html */

(function(root, factory) {
const exports = factory();

if (typeof module !== 'undefined' && module.exports) {
module.exports = exports;
}

Object.assign(root, exports);
})(typeof window !== 'undefined' ? window : globalThis, function() {
function hasUsableOffsets(word) {
return Number.isInteger(word.startOffset) && Number.isInteger(word.endOffset);
}

function getWordTextFromTranscript(word, transcript) {
if (transcript && hasUsableOffsets(word)) {
const text = transcript.slice(word.startOffset, word.endOffset).trim();
if (text) return text;
}

return String(word.word || word.alignedWord || '').trim();
}

function getGap(transcript, word, nextWord) {
if (!transcript || !hasUsableOffsets(word)) {
return '';
}

if (!nextWord || !hasUsableOffsets(nextWord)) {
return transcript.slice(word.endOffset);
}

return transcript.slice(word.endOffset, nextWord.startOffset);
}

function stripGentleMarkup(text) {
return text
.replace(/\[\+\]/g, '')
.replace(/\|\|/g, '')
.replace(/\\+/g, '')
.replace(/\s+/g, '');
}

function trailingTextBeforeMarkup(gap) {
const markupIndex = gap.search(/\[\+\]|\|\||\\|(?:\r?\n\s*){2,}/);
const trailing = markupIndex === -1 ? gap : gap.slice(0, markupIndex);

return stripGentleMarkup(trailing);
}

function getTrailingWordText(transcript, word, nextWord) {
return trailingTextBeforeMarkup(getGap(transcript, word, nextWord));
}

function startsNewParagraph(gap) {
return /\|\|/.test(gap) || /(?:\r?\n\s*){2,}/.test(gap);
}

function successfulGentleWords(gentleData) {
return ((gentleData && gentleData.words) || [])
.filter((word) => (!word.case || word.case === 'success') && typeof word.start === 'number' && typeof word.end === 'number')
.sort((a, b) => a.start - b.start);
}

function closeParagraph(paragraphs, paragraphStart, paragraphEnd) {
if (!paragraphStart || !paragraphEnd) return;

paragraphs.push({
speaker: null,
start: paragraphStart.start,
end: paragraphEnd.end
});
}

function buildParagraphsByTiming(words, maxGap = 1.5) {
if (words.length === 0) return [];

const paragraphs = [];
let paragraphStart = words[0];
let previous = words[0];

for (let i = 1; i < words.length; i++) {
const word = words[i];
const gap = word.start - previous.end;
const endsSentence = /[.!?]$/.test(previous.text);

if (gap > maxGap && endsSentence) {
closeParagraph(paragraphs, paragraphStart, previous);
paragraphStart = word;
}

previous = word;
}

closeParagraph(paragraphs, paragraphStart, previous);
return paragraphs;
}

function gentleJsonToHyperaudioJson(gentleData) {
const transcript = String((gentleData && gentleData.transcript) || '');
const sourceWords = successfulGentleWords(gentleData);
const words = [];
const paragraphs = [];
let paragraphStart = null;
let previousSourceWord = null;
let previousOutputWord = null;

for (let i = 0; i < sourceWords.length; i++) {
const sourceWord = sourceWords[i];
const nextSourceWord = sourceWords[i + 1] || null;
const gapBefore = previousSourceWord ? getGap(transcript, previousSourceWord, sourceWord) : '';

if (previousOutputWord && startsNewParagraph(gapBefore)) {
closeParagraph(paragraphs, paragraphStart, previousOutputWord);
paragraphStart = null;
}

const gapAfter = getGap(transcript, sourceWord, nextSourceWord);
const text = getWordTextFromTranscript(sourceWord, transcript) + getTrailingWordText(transcript, sourceWord, nextSourceWord);

if (text) {
const outputWord = {
start: sourceWord.start,
end: sourceWord.end,
text: text
};

words.push(outputWord);
if (!paragraphStart) paragraphStart = outputWord;
previousOutputWord = outputWord;
}

if (previousOutputWord && startsNewParagraph(gapAfter)) {
closeParagraph(paragraphs, paragraphStart, previousOutputWord);
paragraphStart = null;
}

previousSourceWord = sourceWord;
}

if (paragraphStart && previousOutputWord) {
closeParagraph(paragraphs, paragraphStart, previousOutputWord);
}

const finalParagraphs = paragraphs.length > 0 ? paragraphs : buildParagraphsByTiming(words);
const sections = words.length > 0
? [{ start: words[0].start, end: words[words.length - 1].end }]
: [];

return { words, paragraphs: finalParagraphs, sections };
}

function splitTrailingPunctuation(text) {
const value = String(text || '').trim();
const match = value.match(/^(.+?)([.,!?;:]+|-)?$/);

if (!match) {
return { core: value, suffix: '' };
}

return {
core: match[1] || value,
suffix: match[2] || ''
};
}

function shouldJoinNextWord(suffix) {
return suffix === '-';
}

function hyperaudioJsonToGentleJson(hyperaudioData) {
const words = ((hyperaudioData && hyperaudioData.words) || [])
.filter((word) => typeof word.start === 'number' && typeof word.end === 'number' && String(word.text || '').trim())
.sort((a, b) => a.start - b.start);

const transcriptParts = [];
const gentleWords = [];
let offset = 0;
let joinNextWord = false;

for (const word of words) {
const { core, suffix } = splitTrailingPunctuation(word.text);
if (!core) continue;

if (transcriptParts.length > 0 && !joinNextWord) {
transcriptParts.push(' ');
offset += 1;
}

const startOffset = offset;
transcriptParts.push(core);
offset += core.length;
const endOffset = offset;

if (suffix) {
transcriptParts.push(suffix);
offset += suffix.length;
}

gentleWords.push({
alignedWord: core.toLowerCase(),
case: 'success',
end: word.end,
endOffset: endOffset,
phones: [],
start: word.start,
startOffset: startOffset,
word: core
});

joinNextWord = shouldJoinNextWord(suffix);
}

return {
transcript: transcriptParts.join(''),
words: gentleWords
};
}

return {
gentleJsonToHyperaudioJson,
hyperaudioJsonToGentleJson
};
});
74 changes: 72 additions & 2 deletions js/hyperaudio-lite-editor-export.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,76 @@ class ImportJson extends HTMLElement {

customElements.define('import-json', ImportJson);

class ExportGentleJson extends HTMLElement {

constructor() {
super();
}

exportGentleJson() {
const hypertranscript = document.getElementById('hypertranscript');

if (hypertranscript === null) {
alert("Currently you can only export Gentle JSON from the transcript view.");
} else {
const jsonData = window.hyperaudioJsonToGentleJson(htmlToJson(hypertranscript));
downloadJson(jsonData, 'hyperaudio-lite-gentle.json');
}
}

connectedCallback() {
this.innerHTML = `<a>Export Gentle JSON</a>`;
this.addEventListener('click', this.exportGentleJson);
}
}

customElements.define('export-gentle-json', ExportGentleJson);

class ImportGentleJson extends HTMLElement {

constructor() {
super();
}

importGentleJson() {
const fileInput = document.createElement('input');
fileInput.type = 'file';
fileInput.accept = 'application/json,.json';
fileInput.addEventListener('change', (event) => {
const file = event.target.files[0];
if (!file) return;

const reader = new FileReader();
reader.addEventListener('load', (event) => {
const hypertranscript = document.getElementById('hypertranscript');

if (hypertranscript === null) {
alert("Currently you can only import Gentle JSON from the Transcript View.");
return;
}

try {
const jsonData = window.gentleJsonToHyperaudioJson(JSON.parse(event.target.result));
hypertranscript.innerHTML = jsonToHtml(jsonData);
document.dispatchEvent(new CustomEvent('hyperaudioInit'));
} catch (error) {
console.error('Unable to import Gentle JSON:', error);
alert('Unable to import Gentle JSON. Please check that the file is valid JSON with Gentle word timings.');
}
});
reader.readAsText(file);
});
fileInput.click();
}

connectedCallback() {
this.innerHTML = `<a>Import Gentle JSON</a>`;
this.addEventListener('click', this.importGentleJson);
}
}

customElements.define('import-gentle-json', ImportGentleJson);

class ImportDeepgramJson extends HTMLElement {

constructor() {
Expand Down Expand Up @@ -428,13 +498,13 @@ class ImportVtt extends HTMLElement {

customElements.define('import-vtt', ImportVtt);

function downloadJson(jsonData) {
function downloadJson(jsonData, filename = 'hyperaudio-lite.json') {
// download json file
let dataStr = 'data:text/json;charset=utf-8,' + encodeURIComponent(JSON.stringify(jsonData, null, 2));
//start download
let downloadAnchorNode = document.createElement('a');
downloadAnchorNode.setAttribute('href', dataStr);
downloadAnchorNode.setAttribute('download', 'hyperaudio-lite.json');
downloadAnchorNode.setAttribute('download', filename);
document.body.appendChild(downloadAnchorNode); // required for firefox
downloadAnchorNode.click();
downloadAnchorNode.remove();
Expand Down
1 change: 1 addition & 0 deletions test/fixtures/gentle-sample-h.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/fixtures/gentle-sample-p.json

Large diffs are not rendered by default.

Loading