Merge branch 'dev' into 'main'

Merge dev to main before handing in report

See merge request cse2000-software-project/2023-2024/cluster-n/11c/atypical-speech-project!155
This commit is contained in:
Yousef El Bakri 2024-06-22 00:09:33 +02:00
commit 6efd4f8595
102 changed files with 3058 additions and 316 deletions

76
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,76 @@
name: CI Pipeline
on:
pull_request:
jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install pre-commit
run: |
python -m pip install --upgrade pip
pip install pre-commit
- name: Run pre-commit
run: SKIP="prettier, eslint" pre-commit run --all-files
app-testing:
runs-on: ubuntu-latest
needs: pre-commit
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: '20.x'
- name: Install pnpm
run: cd app && corepack enable pnpm
- name: Install dependencies
run: cd app && pnpm install
- name: Run pnpm check
run: cd app && pnpm check
- name: Run pnpm lint
run: cd app && pnpm lint
- name: Run pnpm format
run: cd app && pnpm format
- name: Run pnpm test
run: cd app && pnpm test:unit # running only unit tests, since integration costs money
pytest:
runs-on: ubuntu-latest
needs: pre-commit # since installing all the stuff is slow, and precommit is fail-fast
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
cd kernel
sudo apt-get install ffmpeg
curl -sSL https://install.python-poetry.org | python -
poetry install
- name: Run pytest
run: |
cd kernel
poetry run pytest

3
.gitignore vendored
View file

@ -1,3 +1,6 @@
.apikeys.env
kernel/.coverage
app/.pnpm-store
data/*
.nyc_output

View file

@ -19,6 +19,8 @@ stages:
build:app:
stage: build
image: 'node:22.1.0-slim'
variables:
PUBLIC_KERNEL_ORIGIN: "http://kernel:8000"
rules:
- changes:
- app/**/*
@ -35,6 +37,8 @@ build:app:
typecheck:app:
stage: typecheck
image: 'node:22.1.0-slim'
variables:
PUBLIC_KERNEL_ORIGIN: "http://kernel:8000"
rules:
- changes:
- app/**/*
@ -52,6 +56,8 @@ typecheck:app:
lint:app:
stage: lint
image: 'node:22.1.0-slim'
variables:
PUBLIC_KERNEL_ORIGIN: "http://kernel:8000"
rules:
- changes:
- app/**/*
@ -69,6 +75,8 @@ lint:app:
test:app:
stage: test
image: 'node:22.1.0-slim'
variables:
PUBLIC_KERNEL_ORIGIN: "http://kernel:8000"
rules:
- changes:
- app/**/*

View file

@ -24,7 +24,7 @@ repos:
hooks:
- id: pyright
types_or: [python, pyi, jupyter]
additional_dependencies: [numpy, pytest, fastapi, praat-parselmouth, orjson, pydantic, scipy, psycopg, deepgram-sdk, pydub, ffmpeg-python, jiwer, beartype, openai, mytextgrid, bert-score, jaroWinkler]
additional_dependencies: [numpy, pytest, fastapi, torch, praat-parselmouth, orjson, pydantic, scipy, psycopg, deepgram-sdk, pydub, ffmpeg-python, jiwer, beartype, openai, mytextgrid, bert-score, jaroWinkler, "--extra-index-url", "https://download.pytorch.org/whl/cpu"]
stages: [pre-commit]
- repo: https://github.com/crate-ci/typos
rev: v1.21.0
@ -32,6 +32,10 @@ repos:
- id: typos
stages: [pre-commit]
- repo: local
# the local repository is ignored when running precommit in the ci
# but the only way to do this is to ignore the hooks by ids
# so if you change the ids of local hooks, make sure to change them
# in the .github/workflows and (maybe?) .gitlab/ stuff
hooks:
- id: prettier
name: pnpm prettier

1
app/.gitignore vendored
View file

@ -13,3 +13,4 @@ vite.config.ts.timestamp-*
*.db
coverage
test-results
.nyc_output

View file

@ -10,4 +10,4 @@ RUN pnpm install --prod
RUN pnpm run build
CMD pnpm run preview --host --port 5173 | pnpm exec pino-pretty
CMD BODY_SIZE_LIMIT=512000000 PORT=5173 node -r dotenv/config build | pnpm exec pino-pretty

View file

@ -18,6 +18,7 @@
},
"devDependencies": {
"@playwright/test": "^1.44.1",
"@sveltejs/adapter-node": "^5.1.1",
"@types/eslint": "^8.56.10",
"@typescript-eslint/eslint-plugin": "^7.13.0",
"@typescript-eslint/parser": "^7.13.0",
@ -35,7 +36,7 @@
"@fontsource/atkinson-hyperlegible": "^5.0.20",
"@lucia-auth/adapter-drizzle": "^1.0.7",
"@node-rs/argon2": "^1.8.3",
"@sveltejs/adapter-auto": "^3.2.2",
"@sveltejs/adapter-auto": "^3.2.0",
"@sveltejs/kit": "^2.5.16",
"@sveltejs/vite-plugin-svelte": "^3.1.1",
"@types/d3": "^7.4.3",
@ -81,6 +82,7 @@
"unified": "^11.0.4",
"vite": "^5.3.1",
"vitest": "^1.6.0",
"vite-plugin-istanbul": "^6.0.2",
"wavesurfer.js": "^7.7.15",
"webm-to-mp4": "^1.0.0",
"zod": "^3.23.8"

View file

@ -21,15 +21,20 @@ const config: PlaywrightTestConfig = {
name: 'firefox',
use: { ...devices['Desktop Firefox'] },
dependencies: ['setup sample account']
},
{
name: 'chromium',
use: { ...devices['Desktop Chrome'], permissions: ['microphone', 'camera'] },
dependencies: ['setup sample account']
}
],
use: {
baseURL: 'http://localhost:80',
actionTimeout: 10000,
actionTimeout: 30000,
navigationTimeout: 10000
},
expect: {
timeout: 10000
timeout: 30000
},
testDir: 'tests',
testMatch: /(.+\.)?(test|spec)\.[jt]s/

565
app/pnpm-lock.yaml generated

File diff suppressed because it is too large Load diff

View file

@ -56,6 +56,30 @@
export function removeFile(fileId: string) {
paneState.files = paneState.files.filter((file) => file.id !== fileId);
}
export async function addFile(fileJSON: string) {
const { value: json, ok } = JsonSafeParse(fileJSON);
if (!ok) {
// From Dockview
return;
}
const file = fileState.parse(json);
// Don't add files already present
if (paneState.files.some((f) => f.id === file.id)) {
// TODO: Show message (in a Sonner)
return;
}
// When adding a file, wait until we compute the data to add it in
const newFiles = [...paneState.files, file];
getComputedDataProp = await getComputedDataFunction(paneState.mode, {
...paneState,
files: newFiles
});
paneState.files = newFiles;
}
</script>
<section
@ -70,27 +94,7 @@
event.preventDefault();
if (event.dataTransfer) {
const transferredData = event.dataTransfer.getData('application/json');
const { value: json, ok } = JsonSafeParse(transferredData);
if (!ok) {
// From Dockview
return;
}
const file = fileState.parse(json);
// Don't add files already present
if (paneState.files.some((f) => f.id === file.id)) {
// TODO: Show message (in a Sonner)
return;
}
// When adding a file, wait until we compute the data to add it in
const newFiles = [...paneState.files, file];
getComputedDataProp = await getComputedDataFunction(paneState.mode, {
...paneState,
files: newFiles
});
paneState.files = newFiles;
addFile(transferredData);
}
}}
role="group"
@ -102,6 +106,10 @@
{#if getComputedDataProp === null}
Loading...
{:else if paneState.files.length === 0}
<div class="flex h-full w-full items-center justify-center text-2xl text-muted-foreground">
Drag a file from the file explorer and drop it here to start analyzing!
</div>
{:else}
<!--
The type of the component is a union of mode components. However, this means that

View file

@ -1,7 +1,6 @@
<script lang="ts">
import { Button } from '$lib/components/ui/button';
import { flip } from 'svelte/animate';
import { modeNames, modeComponents, type mode as modeType } from '.';
import { modeNames, modeComponents, niceModeNames, type mode as modeType } from '.';
export let mode: modeType.Name;
export let onModeHover: (mode: modeType.Name) => void = () => {};
@ -13,23 +12,23 @@
class:z-40={mode === currentMode}
style:--index={i}
class:opacity-0={mode !== currentMode}
class="select relative transition duration-500"
animate:flip={{ delay: 1000 }}
class="select relative transition duration-300"
>
<Button
on:click={() => (mode = currentMode)}
on:hover={() => onModeHover(currentMode)}
variant={mode === currentMode ? 'default' : 'outline'}
class="h-10 w-16 shadow-xl"
on:click={() => (mode = currentMode)}
on:hover={() => onModeHover(currentMode)}
>
<svelte:component this={modeComponents[currentMode].icon} class="w-12"></svelte:component>
</Button>
<span
class="label pointer-events-none absolute right-16 top-2 h-16 w-max pr-2 transition duration-500 ease-in-out"
<button
class="label absolute right-16 top-0 h-12 w-max pb-2 pr-2 transition duration-300 ease-in-out"
on:click={() => (mode = currentMode)}
>
{currentMode}
</span>
{niceModeNames[currentMode]}
</button>
</div>
{/each}
</div>
@ -56,15 +55,23 @@
.main:hover > div {
opacity: 1;
transition-delay: 0s;
}
.main:hover > div > .label {
opacity: 0.7;
transition-delay: 1s;
}
.main > div > .label {
opacity: 0;
pointer-events: all;
cursor: pointer;
transition-delay: 0s;
}
.label {
opacity: 0;
pointer-events: none;
transition-delay: 0.3s;
}
.main > div:hover > .label {
opacity: 1;
}
</style>

View file

@ -55,6 +55,7 @@
<section class="flex flex-col overflow-x-auto font-mono text-xl tracking-wider">
<!-- This one is for showing the reference -> hypothesis -->
<h4 class="border-box flex whitespace-nowrap">
Ref:
{#each common.alignments as alignment}
<div class="w-fit {colors[alignment.type]}">
{#if alignment.type === 'substitute'}
@ -88,6 +89,7 @@
{/each}
</h4>
<h4 class="border-box flex whitespace-nowrap">
Hyp:
<!-- This one is for showing the hypothesis -> reference -->
{#each common.alignments as alignment}
<div class="w-fit {colors[alignment.type]}">

View file

@ -71,12 +71,18 @@
<span>WIL: {(computedData.wordLevel.wil * 100).toFixed(2) + '%'}</span>
<span>WIP: {(computedData.wordLevel.wip * 100).toFixed(2) + '%'}</span>
</div>
<div class="flex flex-wrap gap-3 font-mono">
<span>BERT: {computedData.wordLevel.bert.toFixed(2)}</span>
<span>Jaro Winkler: {computedData.wordLevel.jaroWinkler.toFixed(2)}</span>
</div>
<ErrorDiff common={computedData.wordLevel} joinString=" " />
<Separator />
<h3 class="pt-4 text-xl">BERT</h3>
<div class="flex flex-wrap gap-3 font-mono">
<span>BERT: {computedData.wordLevel.bert.toFixed(2)}</span>
</div>
<Separator />
<h3 class="pt-4 text-xl">Jaro Winkler</h3>
<div class="flex flex-wrap gap-3 font-mono">
<span>Jaro Winkler: {computedData.wordLevel.jaroWinkler.toFixed(2)}</span>
</div>
<Separator />
<h3 class="pt-4 text-xl">Character Error Rate</h3>
@ -84,6 +90,8 @@
<span class="font-mono">CER: {(computedData.characterLevel.cer * 100).toFixed(2) + '%'}</span>
<ErrorDiff common={computedData.characterLevel} joinString="" />
{:else}
<h2 class="text-muted-foreground">This file has no ground truth.</h2>
<h2 class="text-muted-foreground">
Select a non-empty track for both the reference and hypothesis
</h2>
{/if}
</div>

View file

@ -24,6 +24,8 @@ export const fileState = z.object({
frame: frame.nullable().default(null),
cycleEnabled: z.boolean().default(false),
transcriptions: z.array(transcription).default([]),
groundTruth: z.string().default(''),
note: z.string().default(''),
reference: z
.object({
id: z.string(),

View file

@ -61,6 +61,18 @@ export const modes = {
'error-rate': errorRateData
} as const satisfies Record<string, ModeValidator>;
/**
* Better worded, extended names for the modes, to be shown to the user.
*/
export const niceModeNames: { [id: string]: string } = {
'simple-info': 'General information',
waveform: 'Waveform',
spectrogram: 'Spectrogram',
'vowel-space': 'Vowel space',
transcription: 'Transcription',
'error-rate': 'Error rate'
};
/**
* The names of the modes
*

View file

@ -17,7 +17,9 @@
{ label: 'Duration', value: display(computedData.duration, 'seconds') },
{ label: 'File size', value: formatHumanSensibleFileSize(computedData.fileSize) },
{ label: 'Average pitch', value: display(computedData.averagePitch, 'Hz') },
{ label: 'Date created', value: `${computedData.fileCreationDate.toLocaleString(LOCALE)}` }
{ label: 'Date created', value: `${computedData.fileCreationDate.toLocaleString(LOCALE)}` },
{ label: 'Ground Truth', value: fileState.groundTruth },
{ label: 'Note', value: fileState.note }
];
function getFrameData(frame: typeof computedData.frame) {
@ -38,7 +40,8 @@
$: frameData = getFrameData(computedData.frame);
</script>
<h1 class="overflow-hidden text-ellipsis text-xl font-bold">{fileState.id}</h1>
<h1 class="overflow-hidden text-ellipsis text-xl font-bold">{fileState.name}</h1>
<p class="overflow-hidden text-ellipsis text-xs text-muted-foreground">{fileState.id}</p>
<div class="flex flex-col flex-wrap opacity-80">
{#each displayData as { label, value }}

View file

@ -38,7 +38,9 @@ export const simpleInfoData = {
.pick({
id: true,
name: true,
frame: true
frame: true,
groundTruth: true,
note: true
})
.default({}),

View file

@ -8,14 +8,13 @@
import SpectrogramPlugin from 'wavesurfer.js/dist/plugins/spectrogram.esm.js';
import type { Frame } from '$lib/analysis/kernel/framing';
import type { mode } from '..';
import { used } from '$lib/utils';
import HoverPlugin from 'wavesurfer.js/dist/plugins/hover.js';
import { numberToTime } from '$lib/components/audio-controls';
export let computedData: mode.ComputedData<'spectrogram'>;
export let fileState: mode.FileState<'spectrogram'>;
let element: HTMLElement;
used(computedData);
export const controls: ControlRequirements = {
setSpeed(speed: number) {
wavesurfer.setPlaybackRate(speed);
@ -50,6 +49,7 @@
let wavesurfer: WaveSurfer;
let regions: RegionsPlugin;
let spectrogram: SpectrogramPlugin;
let hover: HoverPlugin;
let timeline: TimelinePlugin;
let spectrogramCanvas: HTMLCanvasElement;
@ -88,6 +88,23 @@
})
);
hover = wavesurfer.registerPlugin(
HoverPlugin.create({
formatTimeCallback: () => ''
})
);
hover.on('hover', (event) => {
const shadowRoot = element.children[0].shadowRoot;
if (shadowRoot) {
const hoverLabel = shadowRoot.querySelector('span[part="hover-label"]');
if (hoverLabel) {
hoverLabel.innerHTML =
numberToTime(wavesurfer.getDuration() * event) + '<br>' + getFormants(event);
}
}
});
regions.enableDragSelection(
{
color: 'rgba(255, 0, 0, 0.1)'
@ -138,6 +155,9 @@
});
wavesurfer.on('play', () => {
if (regions.getRegions().length == 1) {
wavesurfer.setTime(regions.getRegions()[0].start);
}
let ctx = spectrogramCanvas.getContext('2d');
// we do this because the property exists in spectrogram but isnt' available to us
@ -171,6 +191,14 @@
});
wavesurfer.on('timeupdate', () => {
if (wavesurfer.getCurrentTime() > wavesurfer.getDuration())
wavesurfer.setTime(wavesurfer.getDuration());
if (regions.getRegions().length == 1) {
if (wavesurfer.getCurrentTime() > regions.getRegions()[0].end) {
wavesurfer.pause();
wavesurfer.setTime(regions.getRegions()[0].end);
}
}
current = wavesurfer.getCurrentTime();
});
@ -187,6 +215,20 @@
timeline.destroy();
wavesurfer.destroy();
});
function getFormants(index: number) {
if (!computedData.formants) return '';
let position = Math.min(
Math.max(0, Math.floor(computedData.formants.length * index)),
computedData.formants.length - 1
);
let response = '';
for (let i = 0; i < computedData.formants[position].length; i++) {
if (computedData.formants[position][i] === null) continue;
response += 'f' + (i + 1) + ': ' + computedData.formants[position][i] + '<br>';
}
return response;
}
</script>
<div

View file

@ -6,11 +6,15 @@
let {
captions = $bindable(),
duration,
isLast
isLast,
createRegion,
resetRegion
}: {
captions: Caption[];
duration: number | null;
isLast: boolean;
createRegion: (start: number, end: number, currentTime: number[] | null) => void;
resetRegion: () => void;
} = $props();
let paneGroup: PaneGroupAPI | undefined = $state(undefined);
@ -60,6 +64,23 @@
};
captions = [...captions.slice(0, index), newCaption, ...captions.slice(index + 2)];
resetRegion();
}
function resize() {
if (paneGroup === undefined || duration === null) {
return;
}
let layout = paneGroup.getLayout()!;
let prevEnd = 0;
for (let i = 0; i < layout.length; i++) {
captions[i].start = prevEnd;
captions[i].end = prevEnd + duration * (layout[i] / 100);
prevEnd = captions[i].end;
}
}
</script>
@ -76,14 +97,27 @@
tabindex="0"
class="flex h-full w-full items-center justify-center overflow-clip rounded-none bg-accent text-accent-foreground"
onclick={(event: MouseEvent) => handleCreate(event, caption)}
ondblclick={doubleClick}
ondblclick={(event: MouseEvent) => {
doubleClick(event);
createRegion(caption.start, caption.end, null);
}}
onfocusout={(event: FocusEvent) => focusOut(event, caption)}
onkeydown={(event: KeyboardEvent) => keyDown(event, caption)}>{caption.value}</span
>
onkeydown={(event: KeyboardEvent) => keyDown(event, caption)}
onmouseup={resize}
>{caption.value}
</span>
</Resizable.Pane>
{#if caption !== captions[captions.length - 1]}
<Resizable.Handle class="bg-primary/20" onclick={(event) => handleDelete(event, i)} />
<Resizable.Handle
class="bg-primary/20"
onclick={(event) => handleDelete(event, i)}
onmouseup={() => {
const currentTime = [caption.start, caption.end];
resize();
createRegion(caption.start, caption.end, currentTime);
}}
/>
{/if}
{/each}
{/if}

View file

@ -17,6 +17,8 @@
import TimelinePlugin from 'wavesurfer.js/dist/plugins/timeline.esm.js';
import HoverPlugin from 'wavesurfer.js/dist/plugins/hover.esm.js';
import type { Action } from 'svelte/action';
import RegionsPlugin, { type Region } from 'wavesurfer.js/dist/plugins/regions.js';
import type { Frame } from '$lib/analysis/kernel/framing';
let {
fileState = $bindable(),
@ -34,6 +36,7 @@
let wavesurfer: WaveSurfer;
let timeline: TimelinePlugin;
let hover: HoverPlugin;
let regions: RegionsPlugin;
let width: number = $state(100);
let minZoom: number;
@ -41,8 +44,10 @@
let current: number = $state(0);
let playing: boolean = $state(false);
let transcriptionType: { label?: string; value: string } = $state({ value: 'empty' });
const models: string[] = ['whisper', 'deepgram', 'allosaurus'];
let previousSelection: number[] | null = null;
let transcriptionType: { label?: string; value: string } = $state({ value: 'no model' });
const models: string[] = ['whisper', 'deepgram', 'allosaurus', 'whisper-torgo-1-epoch'];
const trackNameSpace = 150;
$effect(() => {
@ -70,6 +75,8 @@
})
);
regions = wavesurfer.registerPlugin(RegionsPlugin.create());
hover = wavesurfer.registerPlugin(
HoverPlugin.create({
formatTimeCallback: () => ''
@ -110,9 +117,54 @@
});
});
wavesurfer.on('timeupdate', (time) => (current = time));
wavesurfer.on('play', () => (playing = true));
wavesurfer.on('timeupdate', () => {
if (wavesurfer.getCurrentTime() > wavesurfer.getDuration())
wavesurfer.setTime(wavesurfer.getDuration());
if (regions.getRegions().length == 1) {
if (wavesurfer.getCurrentTime() > regions.getRegions()[0].end) {
wavesurfer.pause();
wavesurfer.setTime(regions.getRegions()[0].end);
}
}
current = wavesurfer.getCurrentTime();
});
wavesurfer.on('play', () => {
if (regions.getRegions().length == 1) {
wavesurfer.setTime(regions.getRegions()[0].start);
}
playing = true;
});
wavesurfer.on('pause', () => (playing = false));
// regions.enableDragSelection(
// {
// color: 'rgba(255, 0, 0, 0.1)'
// },
// 10
// );
regions.on('region-created', (region: Region) => {
regions.getRegions().forEach((r) => {
if (r.id === region.id) return;
r.remove();
});
let frame: Frame = {
startIndex: Math.floor(region.start * wavesurfer.options.sampleRate),
endIndex: Math.ceil(region.end * wavesurfer.options.sampleRate)
};
fileState.frame = frame;
});
window.addEventListener('keydown', (e: KeyboardEvent) => {
switch (e.key) {
case 'Escape':
regions.clearRegions();
previousSelection = null;
break;
}
});
});
onDestroy(() => {
@ -151,7 +203,7 @@
a.style.display = 'none';
a.href = url;
a.download = 'transcription.TextGrid';
a.download = `${fileState.name}.TextGrid`;
document.body.appendChild(a);
a.click();
@ -178,12 +230,12 @@
async function addTrack() {
if (duration === null) return;
if (transcriptionType.value === 'empty') {
if (transcriptionType.value === 'no model') {
fileState.transcriptions = [
...fileState.transcriptions,
{
id: generateIdFromEntropySize(10),
name: 'new track',
name: 'track name',
selected: true,
captions: [
{
@ -195,6 +247,7 @@
}
];
} else if (models.includes(transcriptionType.value)) {
const model = transcriptionType.value;
let response = await (
await fetch(`/api/transcription/${transcriptionType.value}/${fileState.id}`)
).json();
@ -203,9 +256,15 @@
...fileState.transcriptions,
{
id: generateIdFromEntropySize(10),
name: transcriptionType.value + '-' + response.language,
name: model + (response.language ? '-' + response.language : ''),
selected: true,
captions: response.transcription
},
{
id: generateIdFromEntropySize(10),
name: model + '-sentence' + (response.language ? '-' + response.language : ''),
selected: true,
captions: sentenceCaption(response.transcription)
}
];
} else {
@ -213,6 +272,18 @@
}
}
function sentenceCaption(captions: { start: number; end: number; value: string }[]) {
let sentence = '';
for (const caption of captions) {
if (caption.value === '') continue;
sentence += caption.value + ' ';
}
if (sentence.charAt(sentence.length - 1) === ' ') {
sentence = sentence.substring(0, sentence.length - 1);
}
return [{ start: captions[0].start, end: captions[captions.length - 1].end, value: sentence }];
}
const nonPassiveWheel: Action<HTMLElement, (event: WheelEvent) => void> = (node, callback) => {
node.addEventListener('wheel', callback, { passive: false });
@ -222,6 +293,25 @@
}
};
};
function createRegion(start: number, end: number, currentTime: number[] | null) {
if (currentTime != null && previousSelection != null) {
if (Math.abs(previousSelection[0] - currentTime[1]) < 0.00001) {
start = end;
end = previousSelection[1];
}
if (start > end) {
start = end;
}
}
previousSelection = [start, end];
regions.addRegion({ start, end, drag: false, resize: false, color: 'rgba(255, 0, 0, 0.1)' });
}
function resetRegion() {
regions.clearRegions();
previousSelection = null;
}
</script>
<section bind:this={referenceElement} bind:clientWidth={width} class="w-full bg-accent/50">
@ -307,7 +397,7 @@
</Tooltip.Content>
</Tooltip.Root>
<Tooltip.Root>
<Tooltip.Trigger>
<Tooltip.Trigger class="h-full w-full">
<span
role="button"
tabindex="0"
@ -326,23 +416,29 @@
<Track
bind:captions={transcription.captions}
{duration}
{createRegion}
{resetRegion}
isLast={i === fileState.transcriptions.length - 1}
/>
{/each}
</div>
<!-- Inserting/Exporting track stuff down here -->
<div class="flex w-full justify-center gap-5 pt-2">
<Select.Root bind:selected={transcriptionType}>
<Select.Trigger class="m-0 w-32">
{transcriptionType.value}
</Select.Trigger>
<Select.Content>
<Select.Item value="empty">empty</Select.Item>
{#each models as model}
<Select.Item value={model}>{model}</Select.Item>
{/each}
</Select.Content>
</Select.Root>
<div class="flex items-center">
<span class="mr-2 flex"> Select transcription model: </span>
<Select.Root bind:selected={transcriptionType}>
<Select.Trigger class="m-0 w-32">
{transcriptionType.value}
</Select.Trigger>
<Select.Content>
<Select.Item value="no model">no model</Select.Item>
{#each models as model}
<Select.Item value={model}>{model}</Select.Item>
{/each}
</Select.Content>
</Select.Root>
</div>
<Button class="w-fit" variant="secondary" on:click={addTrack}>Create New Track</Button>
<Tooltip.Root>
@ -350,7 +446,7 @@
<Button class="m-0 w-fit" on:click={exportTextGrid} variant="outline"><Download /></Button>
</Tooltip.Trigger>
<Tooltip.Content>
<p>Export to TextGrid</p>
<p>Export the transcriptions to TextGrid</p>
</Tooltip.Content>
</Tooltip.Root>
</div>

View file

@ -12,7 +12,8 @@ export const transcriptionData = {
.pick({
id: true,
name: true,
transcriptions: true
transcriptions: true,
frame: true
})
.default({}),
@ -22,17 +23,20 @@ export const transcriptionData = {
export function doubleClick(event: MouseEvent) {
const element = event.target! as HTMLElement;
element.contentEditable = 'true';
element.focus();
}
export function focusOut(event: FocusEvent, toChange: { name: string } | { value: string }) {
const element = event.target! as HTMLElement;
// if (!element.isContentEditable) return;
if (!element.isContentEditable) return;
element.contentEditable = 'false';
if ('name' in toChange) {
toChange.name = element.textContent ?? '';
toChange.name = element.innerText ?? '';
element.textContent = toChange.name;
} else {
toChange.value = element.textContent ?? '';
toChange.value = element.innerText ?? '';
element.textContent = toChange.value;
}
}
@ -50,9 +54,11 @@ export function keyDown(event: KeyboardEvent, toChange: { name: string } | { val
element.contentEditable = 'false';
if ('name' in toChange) {
toChange.name = element.textContent ?? '';
toChange.name = element.innerText ?? '';
element.textContent = toChange.name;
} else {
toChange.value = element.textContent ?? '';
toChange.value = element.innerText ?? '';
element.textContent = toChange.value;
}
}
}

View file

@ -5,15 +5,14 @@
import RegionsPlugin, { type Region } from 'wavesurfer.js/dist/plugins/regions.js';
import TimelinePlugin from 'wavesurfer.js/dist/plugins/timeline.esm.js';
import type { mode } from '..';
import { used } from '$lib/utils';
import type { Frame } from '$lib/analysis/kernel/framing';
import HoverPlugin from 'wavesurfer.js/dist/plugins/hover.js';
import { numberToTime } from '$lib/components/audio-controls';
export let computedData: mode.ComputedData<'waveform'>;
export let fileState: mode.FileState<'waveform'>;
let element: HTMLElement;
used(computedData);
export const controls: ControlRequirements = {
setSpeed(speed: number) {
wavesurfer.setPlaybackRate(speed);
@ -47,6 +46,7 @@
let wavesurfer: WaveSurfer;
let regions: RegionsPlugin;
let timeline: TimelinePlugin;
let hover: HoverPlugin;
let minZoom: number;
$: if (width) {
@ -78,6 +78,23 @@
})
);
hover = wavesurfer.registerPlugin(
HoverPlugin.create({
formatTimeCallback: () => ''
})
);
hover.on('hover', (event) => {
const shadowRoot = element.children[0].shadowRoot;
if (shadowRoot) {
const hoverLabel = shadowRoot.querySelector('span[part="hover-label"]');
if (hoverLabel) {
hoverLabel.innerHTML =
numberToTime(wavesurfer.getDuration() * event) + '<br>' + hoverInfo(event);
}
}
});
regions.enableDragSelection(
{
color: 'rgba(255, 0, 0, 0.1)'
@ -117,6 +134,14 @@
});
wavesurfer.on('timeupdate', () => {
if (wavesurfer.getCurrentTime() > wavesurfer.getDuration())
wavesurfer.setTime(wavesurfer.getDuration());
if (regions.getRegions().length == 1) {
if (wavesurfer.getCurrentTime() > regions.getRegions()[0].end) {
wavesurfer.pause();
wavesurfer.setTime(regions.getRegions()[0].end);
}
}
current = wavesurfer.getCurrentTime();
});
@ -125,6 +150,9 @@
});
wavesurfer.on('play', () => {
if (regions.getRegions().length == 1) {
wavesurfer.setTime(regions.getRegions()[0].start);
}
playing = true;
});
@ -139,6 +167,27 @@
wavesurfer.destroy();
});
function hoverInfo(time: number) {
let res = '';
if (!computedData) return res;
let pitchPos = Math.min(
computedData.pitch.length - 1,
Math.max(0, Math.floor(computedData.pitch.length * time))
);
let formantsPos = Math.min(
computedData.formants.length - 1,
Math.max(0, Math.floor(computedData.formants.length * time))
);
if (pitchPos >= 0) {
res += 'pitch: ' + computedData.pitch[pitchPos] + '<br>';
}
if (formantsPos >= 0) {
res += 'f1: ' + computedData.formants[formantsPos][0] + '<br>';
res += 'f2: ' + computedData.formants[formantsPos][1] + '<br>';
}
return res;
}
</script>
<div

View file

@ -6,7 +6,10 @@ export { default as Waveform } from './Waveform.svelte';
export { default as WaveformPlugin } from './WaveformPlugin.svelte';
export const waveformData = {
computedFileData: z.null(),
computedFileData: z.object({
pitch: z.array(z.number()),
formants: z.array(z.array(z.number()))
}),
fileState: fileState
.pick({

View file

@ -0,0 +1,7 @@
<p>
This mode is used to compare tracks created from <span class="font-mono">Transcription</span> mode.
</p>
<p>
You can select a hypothesis and reference track and compare them with all of our metrics that are
available.
</p>

View file

@ -0,0 +1,26 @@
<script lang="ts">
import * as Dialog from '$lib/components/ui/dialog';
import * as Accordion from '$lib/components/ui/accordion/index.js';
import { Button } from '../ui/button';
import { contents } from '.';
</script>
<Dialog.Root>
<Dialog.Trigger class="h-fit">
<!-- TODO: Change it to a better phrase -->
<Button variant="ghost">Show Info</Button>
</Dialog.Trigger>
<Dialog.Content>
<Accordion.Root>
{#each contents as content}
<Accordion.Item value={content.title}>
<Accordion.Trigger>{content.title}</Accordion.Trigger>
<Accordion.Content>
<svelte:component this={content.content} />
</Accordion.Content>
</Accordion.Item>
{/each}
</Accordion.Root>
</Dialog.Content>
</Dialog.Root>

View file

@ -0,0 +1,4 @@
<p>
This mode shows metadata of the files in the analysis panel, as well as frame info if one is
selected.
</p>

View file

@ -0,0 +1,17 @@
<p>This mode is used to visualize spectrograms and select regions.</p>
<p>Click and drag the mouse to select a region.</p>
<h1 class="text-xl">Keybinds</h1>
<ul class="ml-5 list-disc gap-1">
<li>
<span class="rounded bg-accent p-1 font-mono">esc</span> To either remove region or unfocus spectrogram.
</li>
<li>
<span class="rounded bg-accent p-1 font-mono">ctrl + scroll</span> To zoom inside of spectrogram.
</li>
</ul>
<style>
li + li {
margin-top: 5px;
}
</style>

View file

@ -0,0 +1,26 @@
<p>This mode is used to transcribe audio.</p>
<p>In addition, it can generate a transcription with a model of choice.</p>
<h1 class="text-xl">Keybinds</h1>
<ul class="ml-5 list-disc gap-1">
<li>
<span class="rounded bg-accent p-1 font-mono">esc</span> To undo an edit in a caption.
</li>
<li>
<span class="rounded bg-accent p-1 font-mono">enter</span> To finalize an edit in a caption.
</li>
<li>
<span class="rounded bg-accent p-1 font-mono">ctrl + scroll</span> To zoom inside of transcription.
</li>
<li>
<span class="rounded bg-accent p-1 font-mono">shift + left click</span> To create a split.
</li>
<li>
<span class="rounded bg-accent p-1 font-mono">alt + left click</span> To delete a split.
</li>
</ul>
<style>
li + li {
margin-top: 5px;
}
</style>

View file

@ -0,0 +1,7 @@
<p>This mode is used to plot f1/f2 formants from selected frames.</p>
<p>
In addition it can plot all ocurrances of something that is inside of a track in <span
class="font-mono">Transcription</span
> mode.
</p>

View file

@ -0,0 +1,17 @@
<p>This mode is used to visualize waveforms and select regions.</p>
<p>Click and drag the mouse to select a region.</p>
<h1 class="text-xl">Keybinds</h1>
<ul class="ml-5 list-disc gap-1">
<li>
<span class="rounded bg-accent p-1 font-mono">esc</span> To either remove region or unfocus waveform.
</li>
<li>
<span class="rounded bg-accent p-1 font-mono">ctrl + scroll</span> To zoom inside of waveform.
</li>
</ul>
<style>
li + li {
margin-top: 5px;
}
</style>

View file

@ -0,0 +1,20 @@
import type { ComponentType, SvelteComponent } from 'svelte';
import SimpleInfoContent from './SimpleInfoContent.svelte';
import WaveformContent from './WaveformContent.svelte';
import SpectrogramContent from './SpectrogramContent.svelte';
import VowelSpaceContent from './VowelSpaceContent.svelte';
import TranscriptionContent from './TranscriptionContent.svelte';
import ErrorRateContent from './ErrorRateContent.svelte';
export { default as InfoButton } from './InfoButton.svelte';
export const contents: {
title: string;
content: ComponentType<SvelteComponent<Record<string, never>>>;
}[] = [
{ title: 'Simple Info', content: SimpleInfoContent },
{ title: 'Waveform', content: WaveformContent },
{ title: 'Spectrogram', content: SpectrogramContent },
{ title: 'Vowel Space', content: VowelSpaceContent },
{ title: 'Transcription', content: TranscriptionContent },
{ title: 'Error-rate', content: ErrorRateContent }
];

View file

@ -0,0 +1,45 @@
<div class="lds-ripple">
<div></div>
<div></div>
</div>
<style>
.lds-ripple {
display: inline-block;
position: relative;
aspect-ratio: 1;
height: 100%;
}
.lds-ripple div {
position: absolute;
border: 2px solid #fff;
opacity: 50%;
border-radius: 50%;
animation: lds-ripple 1s cubic-bezier(0, 0.2, 0.8, 1) infinite;
}
.lds-ripple div:nth-child(2) {
animation-delay: -0.5s;
}
@keyframes lds-ripple {
0% {
top: 45%;
left: 45%;
width: 0;
height: 0;
opacity: 0;
}
4.9% {
opacity: 0;
}
5% {
opacity: 50%;
}
100% {
top: 0;
left: 0;
width: 90%;
height: 90%;
opacity: 0;
}
}
</style>

View file

@ -19,13 +19,14 @@
selectedStore.set(null);
break;
case ' ':
if (
(e.target as HTMLTextAreaElement).tagName.toUpperCase() == 'INPUT' ||
selected === null
)
return;
e.preventDefault();
selected.togglePlay();
// if (
// (e.target as HTMLTextAreaElement).tagName.toUpperCase() == 'INPUT' ||
// (e.target as HTMLTextAreaElement).tagName.toUpperCase() == 'SPAN' ||
// selected === null
// )
// return;
// e.preventDefault();
// selected.togglePlay();
break;
case 'ArrowLeft':
selected?.seek(-step);

View file

@ -1,12 +1,15 @@
<script lang="ts">
import type { DockviewPanelApi } from 'dockview-core';
import XIcon from 'lucide-svelte/icons/x';
import * as AlertDialog from '$lib/components/ui/alert-dialog/index.js';
export let api: DockviewPanelApi;
export let title: string | undefined;
let previousTitle: string | null = null;
let titleElement: HTMLElement;
let deleteAlertOpen = false;
function handleClick() {
if (titleElement.isContentEditable) {
return;
@ -46,8 +49,24 @@
</button>
<button
class="ml-auto h-full cursor-pointer items-center justify-center rounded-none p-0 px-1 transition hover:bg-destructive/30"
on:mousedown={() => api.close()}
on:mousedown={() => (deleteAlertOpen = true)}
>
<XIcon class="h-4 w-4 text-secondary-foreground"></XIcon>
</button>
</div>
<AlertDialog.Root bind:open={deleteAlertOpen}>
<AlertDialog.Content>
<AlertDialog.Header>
<AlertDialog.Title>Are you absolutely sure?</AlertDialog.Title>
<AlertDialog.Description>
This action cannot be undone. This will permanently delete this pane and the analysis
conducted in it.
</AlertDialog.Description>
</AlertDialog.Header>
<AlertDialog.Footer>
<AlertDialog.Cancel>Cancel</AlertDialog.Cancel>
<AlertDialog.Action on:click={() => api.close()}>Continue</AlertDialog.Action>
</AlertDialog.Footer>
</AlertDialog.Content>
</AlertDialog.Root>

View file

@ -0,0 +1,3 @@
import { writable } from 'svelte/store';
export const menubarOverrides = writable<{ [id: string]: string }>({});

View file

@ -7,7 +7,7 @@ export async function uploadFile(file: File, sessionId: string, userId: string)
const arrayBuffer = await file.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
await uploadFileAsBuffer(buffer, file.name, sessionId, userId, null);
await uploadFileAsBuffer(buffer, file.name, sessionId, userId, '', '');
logger.trace(`File ${file.name} uploaded as buffer`);
}
@ -16,7 +16,8 @@ export async function uploadFileAsBuffer(
name: string,
sessionId: string,
userId: string,
groundTruth: string | null
groundTruth: string,
note: string
) {
await db.insert(fileTable).values({
name,
@ -24,6 +25,7 @@ export async function uploadFileAsBuffer(
uploader: userId,
data: buffer,
session: sessionId,
groundTruth: groundTruth
groundTruth,
note
});
}

View file

@ -55,13 +55,14 @@ export const fileTable = pgTable('files', {
.default(sql`CURRENT_TIMESTAMP`)
.notNull(),
uploader: text('uploader')
.references(() => userTable.id)
.references(() => userTable.id, { onDelete: 'cascade' })
.notNull(),
session: text('session')
.references(() => sessionTable.id)
.references(() => sessionTable.id, { onDelete: 'cascade' })
.notNull(),
ephemeral: boolean('ephemeral').notNull().default(false),
groundTruth: text('ground_truth'),
groundTruth: text('ground_truth').notNull().default(''),
note: text('note').notNull().default(''),
state: jsonb('state')
.notNull()
.default(sql`'{}'`)
@ -84,7 +85,7 @@ export const sessionTable = pgTable('session', {
name: text('name').notNull(),
owner: text('owner')
.notNull()
.references(() => userTable.id),
.references(() => userTable.id, { onDelete: 'cascade' }),
creationTime: timestamp('creation_time')
.default(sql`CURRENT_TIMESTAMP`)
.notNull(),

View file

@ -19,7 +19,8 @@ const sampleUser = {
id: 'sample-user',
username: 'Sample',
email: 'sample@example.com',
password: 'password'
password: 'password',
privacyAck: true
};
export async function seedSampleUser() {
@ -46,6 +47,8 @@ const sampleSessionState: SessionState = {
frame: null,
cycleEnabled: false,
transcriptions: [],
groundTruth: 'the quick brown fox jumps over the lazy dog',
note: 'from Torgo dataset',
reference: null,
hypothesis: null
},
@ -56,6 +59,8 @@ const sampleSessionState: SessionState = {
frame: null,
cycleEnabled: true,
transcriptions: [],
groundTruth: 'the quick brown fox jumps over the lazy dog',
note: 'from Torgo dataset',
reference: null,
hypothesis: null
}

View file

@ -8,22 +8,31 @@ export async function createUser({
id,
username,
password,
email
email,
privacyAck
}: {
id?: string;
username: string;
password: string;
email: string;
privacyAck: boolean;
}): Promise<
| {
success: false;
reason: 'email-in-use' | 'username-in-use';
reason: 'email-in-use' | 'username-in-use' | 'unread-policy';
}
| {
success: true;
userId: string;
}
> {
if (privacyAck === false) {
return {
success: false,
reason: 'unread-policy'
};
}
const userId = id ?? generateIdFromEntropySize(10);
const hashedPassword = await hash(password, {
// recommended minimum parameters

View file

@ -1 +1,5 @@
// place files you want to import through the `$lib` alias in this folder.
import { writable, type Writable } from 'svelte/store';
export const uploadingStateStore: Writable<boolean> = writable(false);

View file

@ -0,0 +1,49 @@
import { PUBLIC_KERNEL_ORIGIN } from '$env/static/public';
import { error } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { z } from 'zod';
import { db } from '$lib/database';
import { and, eq } from 'drizzle-orm';
import { fileTable } from '$lib/database/schema';
export const POST: RequestHandler = async ({ request, params: { path }, locals: { user } }) => {
if (!user) {
error(401, 'Not logged in');
}
const json = (await request.json()) as unknown;
const jsonShape = z.object({
fileState: z.object({
id: z.string()
})
});
const result = jsonShape.safeParse(json);
if (!result.success) {
error(400, 'Invalid JSON or no fileState.id');
}
const {
fileState: { id }
} = result.data;
const dbFile = await db.query.fileTable.findFirst({
where: and(eq(fileTable.id, id), eq(fileTable.uploader, user.id)),
columns: { id: true }
});
if (!dbFile) {
error(404, `File not found (id: ${id})`);
}
const url = new URL(path, PUBLIC_KERNEL_ORIGIN);
return await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(json)
});
};

View file

@ -48,7 +48,8 @@ export const actions: Actions = {
file['name'],
sessionId,
userId,
file['groundTruth']
file['groundTruth'],
file['note']
);
}

View file

@ -17,6 +17,8 @@
let disableExport: boolean = false;
let disableImport: boolean = false;
let shortcutsEnabled: boolean = true;
let selectedCamera: Selected<MediaDeviceInfo | null> = {
label: 'Default camera',
value: null
@ -36,11 +38,12 @@
const zip = new JSZip();
let notes = '';
for (let prompt of prompts) {
let promptIndexPadded = prependZeros(4, '' + prompt.index);
let promptIndexPadded = prependZeros(4, '' + (prompt.index + 1));
let promptName = promptIndexPadded + '-' + prompt.id;
zip.file(`${promptName}.txt`, prompt.content);
for (let i = 0; i < prompt.recordings.length; i++) {
let recordingName = promptIndexPadded + '-' + prependZeros(3, '' + i) + '-' + prompt.id;
let recordingName =
promptIndexPadded + '-' + prependZeros(3, '' + (i + 1)) + '-' + prompt.id;
notes += recordingName + ': ' + prompt.recordings[i].note;
zip.file(`${recordingName}.webm`, prompt.recordings[i].blob);
}
@ -66,10 +69,15 @@
const formData = new FormData();
let data = [];
for (let prompt of prompts) {
let promptIndexPadded = prependZeros(4, '' + prompt.index);
let promptIndexPadded = prependZeros(4, '' + (prompt.index + 1));
for (let i = 0; i < prompt.recordings.length; i++) {
let recordingName = promptIndexPadded + '-' + prependZeros(3, '' + i) + '-' + prompt.id;
data.push({ name: recordingName, groundTruth: prompt.content });
let recordingName =
promptIndexPadded + '-' + prependZeros(3, '' + (i + 1)) + '-' + prompt.id;
data.push({
name: recordingName,
groundTruth: prompt.content,
note: prompt.recordings[i].note
});
formData.append(recordingName, prompt.recordings[i].blob);
}
}
@ -96,6 +104,7 @@
}
function handleKeydown(event: KeyboardEvent) {
if (!shortcutsEnabled) return;
if (event.key === 'ArrowRight') {
next();
} else if (event.key === 'ArrowLeft') {
@ -211,6 +220,13 @@
micInfo={selectedMic.value}
onNext={next}
onPrevious={previous}
{shortcutsEnabled}
enableShortcuts={() => {
shortcutsEnabled = true;
}}
disableShortcuts={() => {
shortcutsEnabled = false;
}}
/>
</section>
{/each}

View file

@ -23,6 +23,11 @@
*/
export let onNext: () => void = () => {};
export let disableShortcuts: () => void;
export let enableShortcuts: () => void;
export let shortcutsEnabled: boolean;
/**
* The index of the recording that is currently being previewed.
*/
@ -36,12 +41,18 @@
}
function handleKeydown(event: KeyboardEvent) {
if (!shortcutsEnabled) return;
if (event.key === 'r') {
if (focused) {
cameraComponent.toggleRecording();
}
}
}
function handleNoteChange(event: Event, index: number) {
const textarea = event.target as HTMLTextAreaElement;
prompt.recordings[index].note = textarea.value;
}
</script>
<svelte:window on:keydown={handleKeydown} />
@ -121,8 +132,17 @@
<div class="m-4 h-full flex-1 rounded bg-background p-2 text-left">
{#if previewing && previewingIndex !== null}
Notes for take {previewingIndex + 1}
<Textarea bind:value={previewing.note}></Textarea>
Notes for take {previewingIndex + 1} (typing auto saves)
<Textarea
on:focus={disableShortcuts}
on:blur={enableShortcuts}
bind:value={previewing.note}
on:input={(e: InputEvent) => {
if (previewingIndex) {
handleNoteChange(e, previewingIndex);
}
}}
/>
{/if}
</div>
</div>

View file

@ -102,6 +102,10 @@ export const actions: Actions = {
}
redirect(301, `session/${sessionId}`);
},
deleteSession: async ({ request }) => {
const sessionId = await request.json();
await db.delete(sessionTable).where(eq(sessionTable.id, sessionId));
},
createSession: async ({ request, locals }) => {
const formData = await request.formData();
const sessionName = formData.get('sessionName');

View file

@ -1,5 +1,6 @@
<script lang="ts">
import { buttonVariants } from '$lib/components/ui/button';
import { menubarOverrides } from '$lib/components/ui/menubar/overrides';
import type { PageData } from './$types';
import SessionCard from './SessionCard.svelte';
import { cn } from '$lib/utils';
@ -13,7 +14,16 @@
import { toast } from 'svelte-sonner';
import { Toaster } from '$lib/components/ui/sonner';
import { Button } from '$lib/components/ui/button';
let importingSession: boolean = false;
let openContextMenus: boolean[] = [];
function closeAllContextMenus() {
for (let i = 0; i < openContextMenus.length; i++) {
openContextMenus[i] = false;
}
}
async function handleFileUpload(event: Event) {
importingSession = true;
@ -75,6 +85,12 @@
}
export let data: PageData;
for (const session of data.sessions) {
menubarOverrides.update((oldStore) => {
return { ...oldStore, [session['id']]: session['name'] };
});
}
</script>
<svelte:head>
@ -91,9 +107,16 @@
</Dialog.Trigger>
</li>
{#each data.sessions as session}
{#each data.sessions as session, i}
<li>
<SessionCard {session}></SessionCard>
<SessionCard
{session}
{closeAllContextMenus}
bind:isContextMenuOpen={openContextMenus[i]}
onDeleteSession={() => {
data.sessions = data.sessions.filter((s) => s.id !== session.id);
}}
></SessionCard>
</li>
{/each}
</ul>
@ -103,9 +126,22 @@
<Dialog.Header>
<Dialog.Title class="text-3xl">Enter new session name</Dialog.Title>
<Dialog.Description>
<form action="?/createSession" method="POST" use:enhance>
<Input disabled={importingSession} type="text" name="sessionName" minlength={1} required
<form
id="create-session-form"
class="flex"
action="?/createSession"
method="POST"
use:enhance
>
<Input
class="mr-1"
disabled={importingSession}
type="text"
name="sessionName"
minlength={1}
required
></Input>
<Button type="submit" disabled={importingSession}>Create Session</Button>
</form>
</Dialog.Description>
<Dialog.Title class="text-3xl">Or Import a session</Dialog.Title>

View file

@ -5,8 +5,26 @@
import * as Tooltip from '$lib/components/ui/tooltip';
import * as ContextMenu from '$lib/components/ui/context-menu';
import * as AlertDialog from '$lib/components/ui/alert-dialog';
export let session: typeof sessionTable.$inferSelect;
export let closeAllContextMenus: () => void;
export let isContextMenuOpen = false;
export let onDeleteSession: (fileId: string) => void;
let deleteAlertOpen = false;
async function deleteSession() {
await fetch('?/deleteSession', {
method: 'POST',
body: JSON.stringify(session.id)
}).then(async (response) => {
if ((await response.json()).status == 204) {
onDeleteSession(session.id);
}
});
}
async function exportSession() {
let response = await fetch('?/exportSession', {
@ -40,7 +58,15 @@
<Tooltip.Root>
<Tooltip.Trigger>
<ContextMenu.Root>
<ContextMenu.Root
bind:open={isContextMenuOpen}
onOpenChange={(isOpened) => {
if (!isOpened) return;
closeAllContextMenus();
isContextMenuOpen = true;
}}
>
<ContextMenu.Trigger>
<Button class="h-fit px-6 py-8" variant="outline" href="session/{session.id}">
<section class="flex flex-col items-start">
@ -64,6 +90,7 @@
</ContextMenu.Trigger>
<ContextMenu.Content>
<ContextMenu.Item on:click={() => exportSession()}>Export</ContextMenu.Item>
<ContextMenu.Item on:click={() => (deleteAlertOpen = true)}>Delete</ContextMenu.Item>
</ContextMenu.Content>
</ContextMenu.Root>
</Tooltip.Trigger>
@ -72,3 +99,19 @@
<p>{session.name}</p>
</Tooltip.Content>
</Tooltip.Root>
<AlertDialog.Root bind:open={deleteAlertOpen}>
<AlertDialog.Content>
<AlertDialog.Header>
<AlertDialog.Title>Are you absolutely sure?</AlertDialog.Title>
<AlertDialog.Description>
This action cannot be undone. This will permanently delete your session and remove your data
from our servers.
</AlertDialog.Description>
</AlertDialog.Header>
<AlertDialog.Footer>
<AlertDialog.Cancel>Cancel</AlertDialog.Cancel>
<AlertDialog.Action on:click={() => deleteSession()}>Continue</AlertDialog.Action>
</AlertDialog.Footer>
</AlertDialog.Content>
</AlertDialog.Root>

View file

@ -20,7 +20,9 @@ export const load = (async ({ params: { sessionId } }) => {
columns: {
id: true,
name: true,
state: true
state: true,
note: true,
groundTruth: true
}
}
}
@ -43,7 +45,7 @@ export const load = (async ({ params: { sessionId } }) => {
}) satisfies PageServerLoad;
async function getFiles(result: {
files: { id: string; name: string; state: unknown }[];
files: { id: string; name: string; note: string; groundTruth: string; state: unknown }[];
}): Promise<FileState[]> {
const promises = result.files.map(async (file) => {
try {
@ -55,7 +57,9 @@ async function getFiles(result: {
return fileState.parse({
...file.state,
id: file.id,
name: file.name
name: file.name,
note: file.note,
groundTruth: file.groundTruth
});
} catch (err) {
logger.trace(`File ${file.id} not found`);
@ -64,7 +68,9 @@ async function getFiles(result: {
return {
...defaultState,
id: file.id,
name: file.name
name: file.name,
note: file.note,
groundTruth: file.groundTruth
};
}
});

View file

@ -1,10 +1,12 @@
<script lang="ts">
import { browser } from '$app/environment';
import { uploadingStateStore } from '$lib';
import * as Resizable from '$lib/components/ui/resizable';
import type { PageData } from './$types';
import FileExplorer from './FileExplorer.svelte';
import Workspace from './Workspace.svelte';
import type { SessionState } from './workspace';
import { beforeNavigate } from '$app/navigation';
export let data: PageData;
let lastUpdate: number = -Infinity;
@ -19,6 +21,7 @@
*/
function attemptSync() {
if (!browser || timeout !== null) return;
uploadingStateStore.set(true);
let now = Date.now();
@ -27,6 +30,7 @@
syncState(data.state);
lastUpdate = Date.now();
timeout = null;
uploadingStateStore.set(false);
},
5 * 1000 - (now - lastUpdate)
);
@ -48,7 +52,24 @@
attemptSync();
}
let workspace: Workspace;
// Send alert if closing while loading
beforeNavigate(({ type, cancel }) => {
if ($uploadingStateStore) {
// sync ASAP, but don't block the thread
syncState(data.state);
if (type === 'leave') {
// if leaving: nicely suggest to wait
cancel();
}
// if any other type of leaving (probably local link)
// we actually don't care, since the async process was triggered
// so the saving will continue even after you moved
}
});
let workspace: Workspace | undefined = undefined;
</script>
<svelte:head>
@ -60,8 +81,9 @@
<Resizable.Pane defaultSize={20} minSize={11} collapsible={true} collapsedSize={1}>
<FileExplorer
bind:files={data.files}
bind:workspace
sessionId={data.sessionId}
onDeleteFile={(fileId) => workspace.deleteFile(fileId)}
onDeleteFile={(fileId) => workspace?.deleteFile(fileId)}
></FileExplorer>
</Resizable.Pane>

View file

@ -1,5 +1,4 @@
<script lang="ts">
import { Button } from '$lib/components/ui/button';
import * as AlertDialog from '$lib/components/ui/alert-dialog/index.js';
import * as ContextMenu from '$lib/components/ui/context-menu';
import * as Dialog from '$lib/components/ui/dialog';
@ -12,6 +11,8 @@
let tempName: string;
export let file: FileState;
export let onDeleteFile: (fileId: string) => void = () => {};
export let contextMenuOpen = false;
export let closeAllContextMenus: () => void;
// Manual fetch because it's a hassle to set up the form
async function deleteFile(fileId: string) {
@ -24,22 +25,37 @@
}
</script>
<ContextMenu.Trigger>
<Button class="flex-2 w-full gap-2 rounded text-left" variant="ghost">
<FileIcon></FileIcon>
<span class="max-w-full flex-1 overflow-hidden text-ellipsis">
{file.name}
</span>
</Button>
</ContextMenu.Trigger>
<ContextMenu.Root
bind:open={contextMenuOpen}
onOpenChange={(opened) => {
if (!opened) return;
<ContextMenu.Content>
<ContextMenu.Item on:click={() => (renameDialogOpen = true)}>Rename</ContextMenu.Item>
<ContextMenu.Item
><a href={`/db/file/${file.id}`} download={file.name}>Download</a></ContextMenu.Item
>
<ContextMenu.Item on:click={() => (deleteAlertOpen = true)}><span>Delete</span></ContextMenu.Item>
</ContextMenu.Content>
closeAllContextMenus();
contextMenuOpen = true;
}}
>
<ContextMenu.Trigger>
<div
class="flex-2 inline-flex h-10 w-full items-center justify-center gap-2 whitespace-nowrap rounded-md px-4 py-2 text-left text-sm font-medium ring-offset-background transition-colors hover:bg-accent hover:text-accent-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50"
>
<FileIcon />
<span class="max-w-full flex-1 overflow-hidden text-ellipsis">
{file.name}
</span>
</div>
</ContextMenu.Trigger>
<ContextMenu.Content>
<ContextMenu.Item on:click={() => (renameDialogOpen = true)}>Rename</ContextMenu.Item>
<ContextMenu.Item
><a href={`/db/file/${file.id}`} download={file.name}>Download</a></ContextMenu.Item
>
<ContextMenu.Item on:click={() => (deleteAlertOpen = true)}
><span>Delete</span></ContextMenu.Item
>
</ContextMenu.Content>
</ContextMenu.Root>
<AlertDialog.Root bind:open={deleteAlertOpen}>
<AlertDialog.Content>

View file

@ -15,44 +15,56 @@
import Recorder from './Recorder.svelte';
import FileEntry from './FileEntry.svelte';
import type { FileState } from '$lib/analysis/modes/file-state';
import * as ContextMenu from '$lib/components/ui/context-menu';
import Workspace from './Workspace.svelte';
export let workspace: Workspace | undefined;
export let files: FileState[];
export let sessionId: string;
export let onDeleteFile: (fileId: string) => void;
let filesContextMenu: boolean[] = [];
let submitButton: HTMLInputElement;
function closeAllContextMenus() {
for (let i = 0; i < filesContextMenu.length; i++) {
filesContextMenu[i] = false;
}
}
</script>
<div class="flex h-full flex-col bg-secondary/75 text-secondary-foreground">
<ol class="flex-1 py-2">
<ContextMenu.Root>
{#each files as file (file.id)}
<li
class="px-2 py-1"
animate:flip={{ duration: 400 }}
transition:fade={{ duration: 400 }}
draggable={true}
on:dragstart={(event) => {
event.dataTransfer?.setData('application/json', JSON.stringify(file));
if (event.dataTransfer) {
event.dataTransfer.dropEffect = 'copy';
}
<div class="h-full flex-1 flex-col py-2">
{#each files as file, i (file.id)}
<button
class="w-full px-2 py-1"
animate:flip={{ duration: 400 }}
transition:fade={{ duration: 400 }}
draggable={true}
on:click={() => {
workspace?.addFileJSON(JSON.stringify(file));
}}
on:dragstart={(event) => {
event.dataTransfer?.setData('application/json', JSON.stringify(file));
if (event.dataTransfer) {
event.dataTransfer.dropEffect = 'copy';
}
}}
>
<FileEntry
{file}
onDeleteFile={() => {
onDeleteFile(file.id);
files = files.filter((f) => f.id !== file.id);
}}
>
<FileEntry
{file}
onDeleteFile={() => {
onDeleteFile(file.id);
files = files.filter((f) => f.id !== file.id);
}}
></FileEntry>
</li>
{:else}
<div class="text-muted-foreground w-full text-center py-3">No files yet!</div>
{/each}
</ContextMenu.Root>
</ol>
bind:contextMenuOpen={filesContextMenu[i]}
{closeAllContextMenus}
/>
</button>
{:else}
<div class="text-muted-foreground w-full text-center py-3">No files yet!</div>
{/each}
</div>
<Separator class=""></Separator>

View file

@ -20,6 +20,17 @@
};
};
export function addFileJSON(fileJSON: string) {
for (const pane of panesApi.panels) {
const instance = pane.view.content as SvelteRenderer;
(
instance.getInstance() as
| (ComponentType & { addFile: (fileJSON: string) => void })
| undefined
)?.addFile(fileJSON);
}
}
export function deleteFile(fileId: string) {
for (const pane of panesApi.panels) {
const instance = pane.view.content as SvelteRenderer;

View file

@ -14,15 +14,11 @@ export const POST: RequestHandler = async ({
const formData = await request.formData();
const blob = formData.get('recording') as Blob;
let groundTruth = formData.get('groundTruth') as string | null;
const groundTruth = formData.get('groundTruth') as string;
const buffer = await blob.arrayBuffer();
if (groundTruth === '') {
groundTruth = null;
}
await uploadFileAsBuffer(Buffer.from(buffer), fileName, sessionId, user.id, groundTruth);
await uploadFileAsBuffer(Buffer.from(buffer), fileName, sessionId, user.id, groundTruth, '');
return new Response();
};

View file

@ -4,11 +4,25 @@
import { page } from '$app/stores';
import { Button } from '$lib/components/ui/button';
import UserIcon from 'lucide-svelte/icons/user';
import { uploadingStateStore } from '$lib';
import Spinner from '$lib/components/Spinner.svelte';
import { InfoButton } from '$lib/components/InfoButton';
import { menubarOverrides } from '$lib/components/ui/menubar/overrides';
$: segments = $page.url.pathname.split('/');
// now, if the segment looks like an internal ID of the session
// just replace it with the actual name of the session, and in the snippet
let overrideSegments: { [id: string]: string } = {};
menubarOverrides.subscribe((value) => (overrideSegments = value));
// Workaround for https://github.com/sveltejs/eslint-plugin-svelte/issues/652
page;
let loading: boolean = false;
$: isInSession = segments.length > 2 && segments[1] === 'session';
uploadingStateStore.subscribe((val) => (loading = val));
</script>
<Menubar.Root class="flex h-12 justify-center bg-secondary py-0 text-secondary-foreground">
@ -23,7 +37,13 @@
<!-- TODO: Add logo -->
</div>
<div class="flex flex-1 justify-end text-muted-foreground">
<div class="flex h-full flex-1 justify-end text-muted-foreground">
{#if loading}
<Spinner />
{/if}
{#if isInSession}
<InfoButton />
{/if}
<Button href="/profile" variant="ghost">
<div class="pr-3">Profile</div>
<UserIcon></UserIcon>
@ -44,7 +64,7 @@
<Breadcrumb.Separator></Breadcrumb.Separator>
<Breadcrumb.Item>
<Breadcrumb.Link {href}>{pathSegment}</Breadcrumb.Link>
<Breadcrumb.Link {href}>{overrideSegments[pathSegment] || pathSegment}</Breadcrumb.Link>
</Breadcrumb.Item>
{/each}
</Breadcrumb.List>

View file

@ -1,18 +1,18 @@
## About Spectral
In 2024, we were tasked by the Multimedia Computing (MMC) Group of TU Delft to create an application for conducting comparative analysis of speech. Despite several toolkits already available on the market, none of them were able to fully satisfy the group's needs. We took on the challenge of composing a more user-friendly, multi-functional speech analysis toolkit suitable for typical as well as atypical speech.
In 2024, we were tasked by the MultiMedia Computing (MMC) Group of TU Delft to create an application for conducting comparative analysis of speech. Despite several toolkits already available on the market, none of them were able to fully satisfy the group's needs. We took on the challenge of composing a more user-friendly, multi-functional speech analysis toolkit suitable for typical as well as atypical speech.
We believe that Spectral will be of significant help to all ASR researchers, which in turn makes it easier for specialists to help people in need!
Want to learn more about Spectral? [Read me.](https://gitlab.ewi.tudelft.nl/cse2000-software-project/2023-2024/cluster-n/11c/atypical-speech-project/-/blob/main/README.md?ref_type=heads)
Want to learn more about Spectral? [Read me.](https://github.com/TU-Delft-SALT-Group/spectral/README.md)
Spectral is open sourced! Check out our repo [here](https://gitlab.ewi.tudelft.nl/cse2000-software-project/2023-2024/cluster-n/11c/atypical-speech-project) on GitHub!
Spectral is open sourced! Check out our repo [here](https://github.com/TU-Delft-SALT-Group/spectral) on GitHub!
## About Us
Meet the original team!
- Roman Knyazhitskiy
- Roman Knyazhitskiy &nbsp; --> &nbsp; ([Github](https://github.com/knyazer))
- Odysseas Machairas &nbsp; --> &nbsp; ([Github](https://github.com/Odilf))
- Thijs Houben
- Yousef Bakri &nbsp; --> &nbsp; ([Github](https://github.com/dprin))

View file

@ -5,10 +5,10 @@
</script>
<svelte:head>
<title>Log in Spectral</title>
<title>Log into Spectral</title>
</svelte:head>
<main class="mx-auto max-w-xl py-8">
<h1 class="mb-4 text-2xl font-bold">Log in</h1>
<h1 class="mb-4 text-2xl font-bold">Login</h1>
<LoginForm data={data.form} />
</main>

View file

@ -34,7 +34,7 @@
<Button variant="link" href="/signup" class="w-fit px-0 text-left">Sign up instead</Button>
<Form.Button>Submit</Form.Button>
<Form.Button>Login</Form.Button>
</form>
<style>

View file

@ -0,0 +1,45 @@
<svelte:head>
<title>Policy</title>
</svelte:head>
<section class="mx-auto h-full w-screen max-w-xl break-words px-2 py-8">
<h1 class="w-full text-2xl">Recordee's Consent</h1>
<p>
Please get the recordee's consent in a lawful manner. By using Spectral, we assume that all
recorded audios were attained lawfully and do not bare any responsibility.
</p>
<h1 class="mt-6 w-full text-2xl">Consumer Data</h1>
<p>
Internally, audio/video files are stored in the TU Delft servers, where they are properly
secured by the using a hashed ID, this means people with access to this ID have access to the
files.
</p>
<p>
In addition, users comply that anyone that has access to the server <i>also</i> has access to all
of the users data that is stored.
</p>
<h1 class="mt-6 w-full text-2xl">Data going to external services</h1>
<p>While we are open source, we use services for transcription that are not. These are:</p>
<ul class="ml-5 list-disc">
<li>
<a
href="https://openai.com/policies/eu-privacy-policy/"
class="text-blue-600 underline dark:text-blue-300">OpenAI Whisper</a
>
</li>
<li>
<a href="https://deepgram.com/privacy" class="text-blue-600 underline dark:text-blue-300"
>Deepgram</a
>
</li>
</ul>
<p>
By using our services, users comply with their privacy policies. <i>Please</i> go to their website
and read their policies.
</p>
<p>Data does not get shared to any other services other than those listed above.</p>
</section>

View file

@ -39,6 +39,11 @@ export const actions: Actions = {
return fail(400, {
form
});
case 'unread-policy':
setError(form, 'privacyAck', 'Did not acknowledge privacy policy');
return fail(400, {
form
});
}
}

View file

@ -11,5 +11,5 @@
<main class="mx-auto max-w-xl py-8">
<h1 class="mb-4 text-2xl font-bold">Sign up</h1>
<SignupForm data={data.form} />
<SignupForm bind:data={data.form} />
</main>

View file

@ -1,5 +1,6 @@
<script lang="ts">
import { Button } from '$lib/components/ui/button';
import { Checkbox } from '$lib/components/ui/checkbox';
import * as Form from '$lib/components/ui/form';
import { Input } from '$lib/components/ui/input';
import { formSchema, type FormSchema } from './schema';
@ -40,9 +41,24 @@
<Form.FieldErrors />
</Form.Field>
<Form.Field {form} name="privacyAck" class="mb-4 mt-2 flex items-center gap-1">
<Form.Control let:attrs>
<Checkbox {...attrs} bind:checked={$formData.privacyAck} />
<Form.Label style="margin-top: 0;"
>I accept the <a
target="_blank"
href="/policy"
class="text-blue-600 underline dark:text-blue-300">policies</a
></Form.Label
>
<input hidden type="checkbox" name={attrs.name} checked={$formData.privacyAck} />
</Form.Control>
<Form.FieldErrors />
</Form.Field>
<Button variant="link" href="/login" class="w-fit px-0 text-left">Log in instead</Button>
<Form.Button>Submit</Form.Button>
<Form.Button>Sign up</Form.Button>
</form>
<style>

View file

@ -3,7 +3,8 @@ import { z } from 'zod';
export const formSchema = z.object({
username: z.string().min(2).max(50),
email: z.string().email(),
password: z.string().min(8).max(50)
password: z.string().min(8).max(50),
privacyAck: z.boolean()
});
export type FormSchema = typeof formSchema;

View file

@ -1,4 +1,4 @@
import adapter from '@sveltejs/adapter-auto';
import adapter from '@sveltejs/adapter-node';
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
/** @type {import('@sveltejs/kit').Config} */

View file

@ -0,0 +1,80 @@
import { test, expect } from '../baseFixtures.ts';
import { deleteEverything, setupTests } from '../utils.ts';
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Login' }).click();
await page.getByRole('link', { name: 'Sample Session sample-session' }).click();
});
test.afterEach(deleteEverything);
test('file management test', async ({ page }) => {
await page.getByRole('button', { name: 'MC02_control_head_sentence1', exact: true }).click({
button: 'right'
});
await page.getByRole('menuitem', { name: 'Rename' }).click();
await page.getByPlaceholder('MC02_control_head_sentence1').fill('renamed_file.wav');
await page.getByPlaceholder('MC02_control_head_sentence1').press('Enter');
await expect(
page.getByRole('button', { name: 'MC02_control_head_sentence1', exact: true })
).toHaveCount(0);
await expect(page.getByRole('main')).toContainText('renamed_file.wav');
await page.getByRole('button', { name: 'renamed_file.wav' }).click({
button: 'right'
});
const downloadPromise = page.waitForEvent('download');
await page.getByRole('menuitem', { name: 'Download' }).click();
const download = await downloadPromise;
await expect(download.suggestedFilename()).toBe('renamed_file.wav');
await page.getByRole('button', { name: 'renamed_file.wav' }).click({
button: 'right'
});
await page.getByRole('menuitem', { name: 'Delete' }).click();
await page.getByRole('button', { name: 'Continue' }).click();
await expect(page.getByRole('button', { name: 'renamed_file.wav', exact: true })).toHaveCount(0);
await expect(page.getByRole('textbox')).toBeVisible();
await page.getByRole('textbox').click();
await page
.getByRole('textbox')
.setInputFiles('./app/static/samples/torgo-dataset/MC02_control_head_sentence1.wav');
await expect(page.getByRole('button', { name: 'MC02_control_head_sentence1.' })).toBeVisible();
await expect(page.getByRole('button', { name: 'sample' })).toBeVisible();
await page.locator('.ml-auto').first().click();
await expect(page.getByLabel('Are you absolutely sure?')).toBeVisible();
await expect(page.getByLabel('Are you absolutely sure?')).toContainText(
'This action cannot be undone. This will permanently delete this pane and the analysis conducted in it.'
);
await page.getByRole('button', { name: 'Continue' }).click();
await expect(page.getByRole('button', { name: 'sample' })).toHaveCount(0);
await page.getByRole('button', { name: 'New tab' }).click();
await expect(page.getByRole('button', { name: 'New Tab' })).toBeVisible();
await expect(page.getByText('Drag a file from the file')).toBeVisible();
});
//Please use chromium (firefox is used as standard) to run this test as Firefox does not support "microphone" permission
//and will throw an error Unknown permission: microphone
test('internal recorder test', async ({ page, browser }) => {
await expect(page.getByRole('button', { name: 'new_recording' })).toHaveCount(0);
await page.getByRole('button', { name: 'Record' }).click();
const context = await browser.newContext();
await context.grantPermissions(['microphone']);
await page.waitForTimeout(1000);
await page.getByRole('button', { name: 'Record' }).click();
await page.locator('input[name="filename"]').click();
await page.locator('input[name="filename"]').fill('new_recording');
await page.locator('input[name="groundTruth"]').click();
await page.locator('input[name="groundTruth"]').fill('hello');
await page.getByRole('button', { name: 'Continue' }).click();
await expect(page.getByRole('button', { name: 'new_recording' })).toBeVisible();
});

View file

@ -0,0 +1,87 @@
import { test, expect } from '../baseFixtures.ts';
import { deleteEverything, setupTests } from '../utils.ts';
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Login' }).click();
await page.getByRole('link', { name: 'Sample Session sample-session' }).click();
});
test.afterEach(deleteEverything);
test('error rate test', async ({ page }) => {
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('div:nth-child(5) > .inline-flex').click();
await page.getByText('no model').nth(1).click();
await page.getByRole('option', { name: 'deepgram' }).click();
await page.getByRole('button', { name: 'Create New Track' }).nth(1).click();
await page.waitForTimeout(6000);
await page.getByText('deepgram Create New Track').getByRole('combobox').click();
await page.waitForTimeout(4000);
await page.getByRole('option', { name: 'whisper', exact: true }).click();
await page.getByRole('button', { name: 'Create New Track' }).nth(1).click();
await page.waitForTimeout(5000);
await page.getByRole('button', { name: 'brown' }).nth(1).click({
clickCount: 3
});
await page.getByRole('button', { name: 'brown' }).nth(1).fill('red');
await page.getByRole('button', { name: 'brown' }).nth(1).click({
clickCount: 3
});
await page.getByRole('button', { name: 'brown' }).nth(1).fill('red');
await page.getByRole('button', { name: 'fox' }).nth(1).click({
clickCount: 3
});
await page.getByRole('button', { name: 'fox' }).nth(1).fill('box');
await page.getByRole('button', { name: 'jumps' }).nth(1).click({
clickCount: 3
});
await page.getByRole('button', { name: 'jumps' }).nth(1).fill('junks');
await page.getByRole('button', { name: 'lazy' }).nth(1).click({
clickCount: 3
});
await page.getByRole('button', { name: 'lazy' }).nth(1).fill('lady');
await page.locator('div:nth-child(5) > .inline-flex').hover();
await page.locator('div:nth-child(6) > .inline-flex').click();
await expect(page.getByRole('heading', { name: 'F03_moderate_head_sentence1' })).toBeVisible();
await expect(page.getByText('Reference track').nth(1)).toBeVisible();
await expect(page.getByText('Hypothesis track').nth(1)).toBeVisible();
await expect(
page.getByRole('heading', { name: 'Select a non-empty track for' }).nth(1)
).toBeVisible();
await page.getByRole('combobox').nth(2).click();
await page.waitForTimeout(500);
await page.getByRole('option', { name: 'deepgram-en' }).click();
await page.getByRole('combobox').nth(3).click();
await page.waitForTimeout(500);
await page.getByRole('option', { name: 'whisper-english' }).click();
await expect(
page.getByRole('heading', { name: 'This file has no ground truth.' }).nth(1)
).toHaveCount(0);
await expect(page.getByRole('heading', { name: 'Word Error Rate' })).toBeVisible();
await expect(page.getByRole('group')).toContainText(
'WER: 55.56% MER: 55.56% WIL: 80.25% WIP: 19.75%'
);
await expect(page.getByRole('group')).toContainText('BERT: 0.74');
await expect(page.getByRole('group')).toContainText('Jaro Winkler: 0.84');
await expect(page.getByText('hits: 4')).toBeVisible();
await expect(page.getByText('substitutions: 5')).toBeVisible();
await expect(page.getByRole('heading', { name: 'the   quick   brown fox jumps' })).toBeVisible();
await expect(page.getByRole('heading', { name: 'The   quick   __red box junks' })).toBeVisible();
await expect(page.getByRole('heading', { name: 'Character Error Rate' })).toBeVisible();
await expect(page.getByRole('group')).toContainText('CER: 20.93%');
await expect(page.getByText('hits: 34')).toBeVisible();
await expect(page.getByText('substitutions: 7')).toBeVisible();
await expect(page.getByText('deletions: 2')).toBeVisible();
await expect(page.getByRole('heading', { name: 't he quick  b r ow n   f ox' })).toBeVisible();
await expect(page.getByRole('heading', { name: 'T he quick  _ r ed _   b ox' })).toBeVisible();
});

View file

@ -0,0 +1,68 @@
import { test, expect } from '../baseFixtures.ts';
import { deleteEverything, setupTests } from '../utils.ts';
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Login' }).click();
await page.getByRole('link', { name: 'Sample Session sample-session' }).click();
});
test.afterEach(deleteEverything);
test('simple info test', async ({ page }) => {
await page.waitForTimeout(1000);
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('.select > .inline-flex').first().click();
await expect(
page.getByText('MC02_control_head_sentence1 Duration: 4.57 secondsFile size: 146 KBAverage')
).toHaveCount(0);
await page.locator('.select > .inline-flex').first().hover();
await page.locator('div:nth-child(2) > .inline-flex').first().click();
await page
.getByRole('button', { name: 'MC02_control_head_sentence1' })
.dragTo(
page.getByText(
'00.511.522.533.544.5 00:00.000/00:04.800 1.00x F01_severe_head_sentence1 00.511'
)
);
await page.waitForTimeout(1000);
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('.select > .inline-flex').first().click();
await expect(
page.getByText('MC02_control_head_sentence1 Duration: 4.57 secondsFile size: 146 KBAverage')
).toBeVisible();
await page.locator('div:nth-child(3) > div > .inline-flex').first().click();
await expect(
page.getByText('MC02_control_head_sentence1 Duration: 4.57 secondsFile size: 146 KBAverage')
).toHaveCount(0);
});
test('frame info test', async ({ page }) => {
await page.waitForTimeout(1000);
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('.select > .inline-flex').first().click();
await expect(page.getByText('Select a frame in the').first()).toBeVisible();
await page.locator('.select > .inline-flex').first().hover();
await page.locator('div:nth-child(2) > .inline-flex').first().click();
await page.locator('canvas').first().hover();
await page.mouse.down();
await page.mouse.move(500, 0);
await page.mouse.up();
await page.waitForTimeout(1000);
await expect(page.locator('div:nth-child(4) > div')).toBeVisible();
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('.select > .inline-flex').first().click();
await expect(page.getByRole('heading', { name: 'Frame Duration:' })).toBeVisible();
await expect(page.getByRole('heading', { name: 'Frame Pitch:' })).toBeVisible();
await expect(page.getByRole('heading', { name: 'Frame F1 formant:' })).toBeVisible();
await expect(page.getByRole('heading', { name: 'Frame F2 formant:' })).toBeVisible();
});

View file

@ -0,0 +1,62 @@
import { test, expect } from '../baseFixtures.ts';
import { deleteEverything, setupTests } from '../utils.ts';
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Login' }).click();
await page.getByRole('link', { name: 'Sample Session sample-session' }).click();
});
test.afterEach(deleteEverything);
test('drag and drop test', async ({ page }) => {
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('.main > div:nth-child(3) > .inline-flex').click();
await expect(page.getByText('00:00.000/00:04.565 1.00x')).toHaveCount(0);
await page
.getByRole('button', { name: 'MC02_control_head_sentence1' })
.dragTo(
page.getByText(
'00.511.522.533.544.5 00:00.000/00:04.800 1.00x F01_severe_head_sentence1 00.511'
)
);
await expect(page.getByText('00:00.000/00:04.565 1.00x')).toBeVisible();
});
test('playback test', async ({ page }) => {
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('.main > div:nth-child(3) > .inline-flex').click();
await expect(page.locator('canvas:nth-child(2)').first()).toBeVisible();
await expect(
page.locator(
'section:nth-child(2) > div > div > .waveform > div > .scroll > .wrapper > div:nth-child(5) > canvas:nth-child(2)'
)
).toBeVisible();
await expect(page.getByRole('group')).toContainText('00:00.000/00:04.800');
await expect(page.getByRole('group')).toContainText('1.00x');
await page.getByText('1.00x').first().click();
await page.getByRole('option', { name: '1.50x' }).click();
await expect(page.getByRole('group')).toContainText('1.50x');
await page
.getByRole('group')
.locator('section')
.filter({
hasText: '00.511.522.533.544.5 00:00.000/00:04.800 1.50x F01_severe_head_sentence1 00.511'
})
.getByRole('button')
.first()
.click();
await page.waitForTimeout(3200);
await expect(page.getByText('00:00.000/00:04.800')).toHaveCount(0);
await page.locator('.wrapper > div:nth-child(5)').first().click();
await expect(page.getByRole('group')).toContainText('00:02.396/00:04.800');
});

View file

@ -0,0 +1,127 @@
import { test, expect } from '../baseFixtures.ts';
import { deleteEverything, setupTests } from '../utils.ts';
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Login' }).click();
await page.getByRole('link', { name: 'Sample Session sample-session' }).click();
});
test.afterEach(deleteEverything);
test('download textgrid test', async ({ page }) => {
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('div:nth-child(5) > .inline-flex').click();
await page
.getByText('Select transcription model: no model Create New Track')
.nth(1)
.getByRole('combobox')
.click();
await page.waitForTimeout(500);
await page.getByRole('option', { name: 'deepgram' }).click();
await page.getByRole('button', { name: 'Create New Track' }).nth(1).click();
await page.waitForTimeout(6000);
const downloadPromise = page.waitForEvent('download');
await page
.getByRole('group')
.locator('div')
.filter({ hasText: 'deepgram Create New Track' })
.getByRole('button')
.nth(2)
.click();
const download = await downloadPromise;
await expect(download.suggestedFilename()).toBe('F03_moderate_head_sentence1.TextGrid');
});
test('split test', async ({ page }) => {
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('div:nth-child(5) > .inline-flex').click();
await page
.getByText('Select transcription model: no model Create New Track')
.nth(1)
.getByRole('combobox')
.click();
await page.waitForTimeout(500);
await page.getByRole('option', { name: 'deepgram' }).click();
await page.getByRole('button', { name: 'Create New Track' }).nth(1).click();
await page.waitForTimeout(2000);
await page
.getByRole('button', { name: 'quick' })
.first()
.click({
modifiers: ['Shift']
});
await page.locator('div:nth-child(7) > .flex').dblclick();
await page.keyboard.press('h');
await page.keyboard.press('a');
await page.keyboard.press('p');
await page.keyboard.press('p');
await page.keyboard.press('y');
await page.keyboard.press('Enter');
await expect(
page.getByRole('group').locator('div').filter({ hasText: 'the quick brown fox jumps' }).nth(1)
).toHaveCount(1);
await expect(
page.getByRole('group').locator('div').filter({ hasText: 'the quick happy fox jumps' }).nth(1)
).toBeVisible();
});
test('track test', async ({ page }) => {
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('div:nth-child(5) > .inline-flex').click();
await expect(page.getByText('Select transcription model:').first()).toBeVisible();
await expect(page.getByText('no model').first()).toBeVisible();
await expect(page.getByRole('button', { name: 'Create New Track' }).first()).toBeVisible();
await expect(
page
.getByText('Select transcription model: no model Create New Track')
.first()
.getByRole('button')
.nth(1)
).toBeVisible();
await expect(page.getByText(':00.000/00:04.800 F01_severe_head_sentence1')).toBeVisible();
await page
.getByText('Select transcription model: no model Create New Track')
.nth(1)
.getByRole('combobox')
.click();
await page.getByRole('option', { name: 'deepgram' }).click();
await page.getByRole('button', { name: 'Create New Track' }).nth(1).click();
await page.waitForTimeout(6000);
await expect(page.getByText('deepgram-en', { exact: true })).toBeVisible();
await expect(
page.getByRole('group').locator('div').filter({ hasText: 'the quick brown fox jumps' }).nth(1)
).toBeVisible();
await page.getByText('deepgram Create New Track').getByRole('combobox').click();
await page.getByRole('option', { name: 'allosaurus' }).click();
await page.getByRole('button', { name: 'Create New Track' }).nth(1).click();
await page.waitForTimeout(6000);
await expect(page.getByText('allosaurus-en', { exact: true })).toBeVisible();
await expect(
page.getByRole('group').locator('div').filter({ hasText: 'ð æ tʰ k ʁ ɪ tʰ b̥ ɹ a w n f' }).nth(1)
).toBeVisible();
await page.getByText('deepgram-en', { exact: true }).click();
await page.getByText('deepgram-en', { exact: true }).click({
clickCount: 3
});
await page.getByText('deepgram-en', { exact: true }).fill('renamed');
await expect(page.getByText('renamed', { exact: true })).toBeVisible();
await page
.getByRole('group')
.locator('div')
.filter({ hasText: 'renamed' })
.nth(1)
.getByRole('button')
.nth(2)
.click();
await expect(page.getByText('renamed', { exact: true })).toHaveCount(0);
});

View file

@ -0,0 +1,51 @@
import { test, expect } from '../baseFixtures.ts';
import { deleteEverything, setupTests } from '../utils.ts';
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Login' }).click();
await page.getByRole('link', { name: 'Sample Session sample-session' }).click();
});
test.afterEach(deleteEverything);
test('vowel space test', async ({ page }) => {
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('div:nth-child(4) > .inline-flex').click();
await expect(
page.getByText('2,0001,8001,6001,4001,2001,0008006004002000F2 - F12,8002,6002,4002,2002,0001,')
).toBeVisible();
await expect(page.getByRole('group')).toContainText('F2 - F1');
await expect(page.locator('label')).toContainText('Show legend');
await expect(page.getByTitle('F01_severe_head_sentence1')).toHaveCount(0);
await expect(page.getByTitle('F03_moderate_head_sentence1')).toHaveCount(0);
await page.locator('div:nth-child(4) > .inline-flex').hover();
await page.locator('div:nth-child(2) > .inline-flex').click();
await page.locator('canvas').first().hover();
await page.mouse.down();
await page.mouse.move(500, 0);
await page.mouse.up();
await page.locator('canvas').nth(2).hover();
await page.mouse.down();
await page.mouse.move(500, 0);
await page.mouse.up();
await page.locator('div:nth-child(2) > .inline-flex').hover();
await page.locator('div:nth-child(4) > .inline-flex').click();
await expect(page.getByTitle('F01_severe_head_sentence1')).toBeVisible();
await expect(page.getByTitle('F03_moderate_head_sentence1')).toBeVisible();
await expect(
page.locator('svg').filter({ hasText: '2,0001,8001,6001,4001,2001,' }).locator('circle').first()
).toBeVisible();
await expect(
page.locator('svg').filter({ hasText: '2,0001,8001,6001,4001,2001,' }).locator('circle').nth(1)
).toBeVisible();
});

View file

@ -0,0 +1,69 @@
import { test, expect } from '../baseFixtures.ts';
import { deleteEverything, setupTests } from '../utils.ts';
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Login' }).click();
await page.getByRole('link', { name: 'Sample Session sample-session' }).click();
});
test.afterEach(deleteEverything);
test('playback test', async ({ page }) => {
await expect(page.getByRole('group')).toContainText('1.00x');
await page.getByText('1.00x').first().click();
await page.getByRole('option', { name: '1.50x' }).click();
await expect(page.getByRole('group')).toContainText('1.50x');
await expect(page.getByText('00:00.000/00:04.800')).toBeVisible();
await page
.getByRole('group')
.locator('section')
.filter({
hasText: '00.511.522.533.544.5 00:00.000/00:04.800 1.50x F01_severe_head_sentence1 00.511'
})
.getByRole('button')
.first()
.click();
await expect(page.getByText('00:00.000/00:04.800')).toHaveCount(0);
await expect(
page
.getByRole('group')
.locator('section')
.filter({
hasText: '00.511.522.533.544.5 00:04.831/00:04.800 1.50x F01_severe_head_sentence1 00.511'
})
.getByRole('button')
.nth(2)
).toHaveCount(0);
await page
.getByRole('button', { name: 'MC02_control_head_sentence1' })
.dragTo(page.locator('canvas').nth(2));
await expect(
page
.getByRole('group')
.locator('section')
.filter({
hasText: '00.511.522.533.544.5 00:04.831/00:04.800 1.50x F01_severe_head_sentence1 00.511'
})
.getByRole('button')
.nth(2)
).toBeVisible;
});
test('frame selection test', async ({ page }) => {
await expect(page.locator('div:nth-child(4) > div')).toHaveCount(0);
await page.locator('canvas').first().hover();
await page.mouse.down();
await page.mouse.move(500, 0);
await page.mouse.up();
await expect(page.locator('div:nth-child(4) > div')).toBeVisible();
});

View file

@ -0,0 +1,229 @@
import { test, expect } from '../baseFixtures.ts';
import { deleteEverything, setupTests } from '../utils.ts';
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Record' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Login' }).click();
});
test.afterEach(deleteEverything);
//Please use chromium (firefox is used as standard) to run this test file as Firefox does not support "microphone" permission
//and will throw an error Unknown permission: microphone
test('multi-take test', async ({ page, browser }) => {
await page.waitForTimeout(1000);
await page.getByRole('textbox').click();
await page.getByRole('textbox').setInputFiles('./app/static/samples/prompts/text-7.txt');
const context = await browser.newContext();
await context.grantPermissions(['microphone', 'camera']);
await page.waitForTimeout(5000);
await expect(page.getByText('You have recorded 0/7 prompts')).toBeVisible();
await expect(page.getByRole('button', { name: 'Export recording to session' })).toBeVisible();
await expect(page.getByRole('button', { name: 'Save files to disk' })).toBeVisible();
await expect(page.getByRole('heading', { name: 'until missus bofin announced' })).toBeVisible();
await expect(
page.locator('section').filter({ hasText: 'until missus bofin announced' }).locator('video')
).toBeVisible();
await page.getByRole('button', { name: 'Record', exact: true }).first().click();
await page.waitForTimeout(1000);
await page.getByRole('button', { name: 'Stop recording' }).first().click();
await page.waitForTimeout(1000);
await expect(page.getByRole('button', { name: 'Take' })).toBeVisible();
await expect(
page.locator('li').filter({ hasText: 'Take' }).getByRole('button').nth(1)
).toBeVisible();
await page.locator('li').filter({ hasText: 'Take' }).getByRole('button').nth(1).click();
await page.getByRole('button', { name: 'Record', exact: true }).first().click();
await page.getByRole('button', { name: 'Stop recording' }).first().click();
await expect(page.getByText('You have recorded 1/7 prompts')).toBeVisible();
await page.getByRole('button', { name: 'Record', exact: true }).first().click();
await page.waitForTimeout(1000);
await page.getByRole('button', { name: 'Stop recording' }).first().click();
await page.getByRole('button', { name: 'Record', exact: true }).first().click();
await page.waitForTimeout(1000);
await page.getByRole('button', { name: 'Stop recording' }).first().click();
await page.getByRole('button', { name: 'Take 2' }).click();
await expect(page.getByText('Notes for take')).toBeVisible();
await page.getByRole('textbox').click();
await page.getByRole('textbox').press('CapsLock');
await page.getByRole('textbox').fill('S');
await page.getByRole('textbox').press('CapsLock');
await page.getByRole('textbox').fill('Some notes');
await page.getByRole('button', { name: 'Take 2' }).click();
await page.getByRole('button', { name: 'Take 1' }).click();
await expect(page.getByText('Notes for take')).toBeVisible();
await expect(page.getByRole('textbox')).toContainText('');
await page.getByRole('button', { name: 'Take 3' }).click();
await expect(page.getByText('Notes for take')).toBeVisible();
await expect(page.getByRole('textbox')).toContainText('');
await page.getByRole('button', { name: 'Take 3' }).click();
await page.locator('li').filter({ hasText: 'Take 3' }).getByRole('button').nth(1).click();
await page.locator('button:nth-child(3)').first().click();
await page.getByRole('button', { name: 'Record', exact: true }).nth(1).click();
await page.waitForTimeout(1000);
await page.getByRole('button', { name: 'Stop recording' }).nth(1).click();
await expect(page.getByRole('button', { name: 'Take' }).nth(2)).toBeVisible();
await expect(page.getByText('You have recorded 2/7 prompts')).toBeVisible();
await page
.locator('section')
.filter({ hasText: 'you know it inquired bryce 2' })
.getByRole('button')
.nth(1)
.click();
await expect(page.getByRole('button', { name: 'Take' }).first()).toBeVisible();
await expect(page.getByRole('button', { name: 'Take 2' })).toBeVisible();
});
test('record all prompts test', async ({ page, browser }) => {
await page.waitForTimeout(1000);
await page.getByRole('textbox').click();
await page.getByRole('textbox').setInputFiles('./app/static/samples/prompts/text-7.txt');
const context = await browser.newContext();
await context.grantPermissions(['microphone', 'camera']);
await page.waitForTimeout(5000);
await expect(page.getByText('You have recorded 0/7 prompts')).toBeVisible();
await page.getByRole('button', { name: 'Record', exact: true }).first().click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Stop recording' }).first().click();
await expect(page.getByText('You have recorded 1/7 prompts')).toBeVisible();
await page
.locator('section')
.filter({ hasText: 'until missus bofin announced' })
.getByRole('button')
.nth(2)
.click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Record', exact: true }).nth(1).click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Stop recording' }).nth(1).click();
await expect(page.getByText('You have recorded 2/7 prompts')).toBeVisible();
await page
.locator('section')
.filter({ hasText: 'you know it inquired bryce 2' })
.getByRole('button')
.nth(4)
.click();
await expect(page.getByText('You have recorded 1/7 prompts')).toBeVisible();
await page.getByRole('button', { name: 'Record', exact: true }).nth(1).click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Stop recording' }).nth(1).click();
await page
.locator('section')
.filter({ hasText: 'you know it inquired bryce 2' })
.getByRole('button')
.nth(2)
.click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Record', exact: true }).nth(2).click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Stop recording' }).nth(2).click();
await page
.locator('section')
.filter({ hasText: 'areapagus to decide his fate' })
.getByRole('button')
.nth(2)
.click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Record', exact: true }).nth(2).click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Stop recording' }).nth(2).click();
await page
.locator('section')
.filter({ hasText: 'second division' })
.getByRole('button')
.nth(2)
.click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Record', exact: true }).nth(2).click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Stop recording' }).nth(2).click();
await page
.locator('section')
.filter({ hasText: 'after recovering twice or' })
.getByRole('button')
.nth(2)
.click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Record' }).nth(3).click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Stop recording' }).nth(2).click();
await page
.locator('section')
.filter({ hasText: 'piratical carrer by' })
.getByRole('button')
.nth(2)
.click();
await page.getByRole('button', { name: 'Record' }).nth(4).click();
await page.waitForTimeout(100);
await page.getByRole('button', { name: 'Stop recording' }).nth(3).click();
await expect(page.getByText('You have recorded 7/7 prompts')).toBeVisible();
await expect(page.getByText('admires as rapturously as herself 7 Record Take')).toBeVisible();
await page
.locator('section')
.filter({ hasText: 'admires as rapturously as' })
.getByRole('button')
.nth(2)
.click();
await expect(page.getByText('admires as rapturously as herself 7 Record Take')).toBeVisible();
});
test('save files test', async ({ page, browser }) => {
await page.waitForTimeout(1000);
await page.getByRole('textbox').click();
await page.getByRole('textbox').setInputFiles('./app/static/samples/prompts/text-7.txt');
const context = await browser.newContext();
await context.grantPermissions(['microphone', 'camera']);
await page.waitForTimeout(5000);
await page.getByRole('button', { name: 'Record', exact: true }).first().click();
await page.waitForTimeout(1000);
await page.getByRole('button', { name: 'Stop recording' }).first().click();
await page.waitForTimeout(5000);
await expect(page.getByText('You have recorded 1/7 prompts')).toBeVisible();
const downloadPromise = page.waitForEvent('download');
await page.getByRole('button', { name: 'Save files to disk' }).click();
const download = await downloadPromise;
await expect(download.suggestedFilename()).toBe('text-7.txt.zip');
});
test('export to session test', async ({ page, browser }) => {
await page.waitForTimeout(1000);
await page.getByRole('link', { name: 'home' }).click();
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByRole('link', { name: 'home' }).click();
await expect(page.getByRole('link', { name: 'text-7.txt' })).toHaveCount(0);
await page.getByRole('link', { name: 'Record' }).click();
await page.waitForTimeout(1000);
await page.getByRole('textbox').click();
await page.getByRole('textbox').setInputFiles('./app/static/samples/prompts/text-7.txt');
const context = await browser.newContext();
await context.grantPermissions(['microphone', 'camera']);
await page.waitForTimeout(5000);
await expect(page.getByText('You have recorded 0/7 prompts')).toBeVisible();
await page.getByRole('button', { name: 'Record', exact: true }).first().click();
await page.waitForTimeout(1000);
await page.getByRole('button', { name: 'Stop recording' }).first().click();
await page
.locator('section')
.filter({ hasText: 'until missus bofin announced' })
.getByRole('button')
.nth(2)
.click();
await page.getByRole('button', { name: 'Record', exact: true }).nth(1).click();
await page.waitForTimeout(1000);
await page.getByRole('button', { name: 'Stop recording' }).nth(1).click();
await page.getByRole('button', { name: 'Export recording to session' }).click();
await expect(page.getByText('has been created. Go to the session Session')).toBeVisible();
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Analyze' }).click();
await expect(page.getByRole('link', { name: 'text-7.txt' })).toBeVisible();
});

14
app/tests/about.test.ts Normal file
View file

@ -0,0 +1,14 @@
import { test, expect } from './baseFixtures.ts';
import { deleteEverything, setupTests } from './utils.ts';
test('test', async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'About' }).click();
await expect(page.getByRole('link', { name: 'about' })).toBeVisible();
await expect(page.locator('body')).toContainText('About Spectral');
await expect(page.getByText('In 2024, we were tasked by')).toBeVisible();
await expect(page.locator('body')).toContainText('About Us');
});
test.afterEach(deleteEverything);

38
app/tests/baseFixtures.ts Normal file
View file

@ -0,0 +1,38 @@
import * as fs from 'fs';
import * as path from 'path';
import * as crypto from 'crypto';
import { test as baseTest } from '@playwright/test';
const istanbulCLIOutput = path.join(process.cwd(), '.nyc_output');
export function generateUUID(): string {
return crypto.randomBytes(16).toString('hex');
}
export const test = baseTest.extend({
context: async ({ context }, use) => {
await context.addInitScript(() =>
window.addEventListener('beforeunload', () =>
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
(window as any).collectIstanbulCoverage(JSON.stringify((window as any).__coverage__))
)
);
await fs.promises.mkdir(istanbulCLIOutput, { recursive: true });
await context.exposeFunction('collectIstanbulCoverage', (coverageJSON: string) => {
if (coverageJSON)
fs.writeFileSync(
path.join(istanbulCLIOutput, `playwright_coverage_${generateUUID()}.json`),
coverageJSON
);
});
await use(context);
for (const page of context.pages()) {
await page.evaluate(() =>
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
(window as any).collectIstanbulCoverage(JSON.stringify((window as any).__coverage__))
);
}
}
});
export const expect = test.expect;

View file

@ -1,5 +1,9 @@
import { test } from '@playwright/test';
import { test } from './baseFixtures.ts';
import { deleteEverything, setupTests } from './utils.ts';
test('index page has expected h1', async ({ page }) => {
await setupTests({ page });
await page.goto('/');
});
test.afterEach(deleteEverything);

View file

@ -1,8 +1,4 @@
import { test as setup } from '@playwright/test';
import { test as setup } from './baseFixtures.ts';
import { setupTests } from './utils.ts';
setup('setup the sample account', async ({ page }) => {
await page.goto('http://localhost/admin');
await page.getByRole('button', { name: 'Seed database with sample user' }).click();
await page.getByRole('button', { name: 'Seed database with sample session' }).click();
await page.getByRole('button', { name: 'Seed database with TORGO' }).click();
});
setup('setup the sample account', setupTests);

View file

@ -1,13 +1,4 @@
import { test as teardown, expect } from '@playwright/test';
import { test as teardown } from './baseFixtures.ts';
import { deleteEverything } from './utils.ts';
teardown('global teardown', async ({ page }) => {
await page.goto('http://localhost/admin');
await page.getByRole('button', { name: 'Delete all data' }).click();
await page.getByRole('button', { name: 'Seed database with sample user' }).click();
await page.getByRole('button', { name: 'Seed database with sample session' }).click();
await page.getByRole('button', { name: 'Seed database with TORGO' }).click();
await expect(page.locator('ul')).toContainText('Files: 7 rows');
await page.getByText('Sessions: 1 rows').click();
await expect(page.locator('ul')).toContainText('Sessions: 1 rows');
await expect(page.locator('ul')).toContainText('Users: 1 rows');
});
teardown('global teardown', deleteEverything);

View file

@ -1,13 +1,15 @@
import { test, expect } from '@playwright/test';
import { test, expect } from './baseFixtures.ts';
import { deleteEverything, setupTests } from './utils.ts';
test('can login under sample user', async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Start analyzing' }).click();
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Submit' }).click();
await page.getByRole('button', { name: 'Login' }).click();
await expect(page.getByRole('link', { name: 'Sample Session sample-session' })).toBeVisible();
await expect(page.getByRole('menubar')).toContainText('home session Spectral Profile');
await expect(page.locator('h2')).toContainText('Sample Session');
@ -17,12 +19,15 @@ test('can login under sample user', async ({ page }) => {
});
test('invalid username or password', async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Start analyzing' }).click();
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').fill('Spectral');
await page.getByLabel('Username').press('Tab');
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Submit' }).click();
await page.getByRole('button', { name: 'Login' }).click();
await expect(page.locator('form')).toContainText('Invalid username or password');
});
test.afterEach(deleteEverything);

View file

@ -1,4 +1,5 @@
import { test, expect } from '@playwright/test';
import { test, expect } from './baseFixtures.ts';
import { deleteEverything, setupTests } from './utils.ts';
test.use({
viewport: {
@ -8,8 +9,9 @@ test.use({
});
test('register and walk through', async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Start analyzing' }).click();
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByRole('link', { name: 'Sign up instead' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').fill('Roman');
@ -25,15 +27,15 @@ test('register and walk through', async ({ page }) => {
await expect(page.getByText('String must contain at least')).toBeVisible();
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Submit' }).click();
await page.getByRole('button', { name: 'Sign up' }).click();
await expect(page.getByRole('button')).toBeVisible();
await page.waitForTimeout(100);
await page.getByRole('button').click();
await page.waitForTimeout(100);
await expect(page.locator('div').filter({ hasText: 'Profile' }).nth(2)).toBeVisible();
await page.getByRole('textbox').click();
await page.getByRole('textbox').fill('new session asdf');
await page.getByRole('textbox').press('Enter');
await page.locator('input[name="sessionName"]').click();
await page.locator('input[name="sessionName"]').fill('new session asdf');
await page.locator('input[name="sessionName"]').press('Enter');
await expect(page.getByRole('main').filter({ hasNotText: 'sessions' })).toContainText(
'No files yet!'
);
@ -50,6 +52,8 @@ test('register and walk through', async ({ page }) => {
await page.getByLabel('Username').fill('Roman');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Submit' }).click();
await page.getByRole('button', { name: 'Login' }).click();
await expect(page.locator('h2')).toContainText('new session asdf');
});
test.afterEach(deleteEverything);

View file

@ -1,13 +1,21 @@
import { test, expect } from '@playwright/test';
import { test, expect } from './baseFixtures.ts';
import { deleteEverything, setupTests } from './utils.ts';
test('everything in session is visible', async ({ page }) => {
test.beforeEach(async ({ page }) => {
await setupTests({ page });
await page.goto('http://localhost/');
await page.getByRole('link', { name: 'Start analyzing' }).click();
await page.getByRole('link', { name: 'Analyze' }).click();
await page.getByLabel('Username').click();
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('S');
await page.getByLabel('Username').press('CapsLock');
await page.getByLabel('Username').fill('Sample');
await page.getByLabel('Password').click();
await page.getByLabel('Password').fill('password');
await page.getByRole('button', { name: 'Submit' }).click();
await page.getByRole('button', { name: 'Login' }).click();
});
test('everything in session is visible', async ({ page }) => {
await page.getByRole('link', { name: 'Sample Session sample-session' }).click();
await expect(page.locator('canvas').first()).toBeVisible();
await expect(page.locator('canvas').nth(2)).toBeVisible();
@ -16,7 +24,7 @@ test('everything in session is visible', async ({ page }) => {
.getByRole('group')
.locator('section')
.filter({
hasText: '00:00.000/00:04.800 1.00x F01_severe_head_sentence1 00:00.000/00:03.404 1.00x'
hasText: '00.511.522.533.544.5 00:00.000/00:04.800 1.00x F01_severe_head_sentence1 00.511'
})
.getByRole('button')
.first()
@ -26,17 +34,45 @@ test('everything in session is visible', async ({ page }) => {
.getByRole('group')
.locator('section')
.filter({
hasText: '00:00.000/00:04.800 1.00x F01_severe_head_sentence1 00:00.000/00:03.404 1.00x'
hasText: '00.511.522.533.544.5 00:00.000/00:04.800 1.00x F01_severe_head_sentence1 00.511'
})
.getByRole('button')
.nth(1)
).toBeVisible();
await expect(page.locator('li').filter({ hasText: 'F01_severe_head_sentence1' })).toBeVisible();
await expect(page.locator('li').filter({ hasText: 'F03_moderate_head_sentence1' })).toBeVisible();
await expect(page.getByRole('button', { name: 'F01_severe_head_sentence1' })).toBeVisible();
await expect(page.getByRole('button', { name: 'F03_moderate_head_sentence1' })).toBeVisible();
await expect(page.getByRole('button', { name: 'MC02_control_head_sentence1' })).toBeVisible();
await expect(page.getByText('home session sample-session Spectral Profile')).toBeVisible();
await expect(
page.getByText('home session Sample Session Spectral Show Info Profile')
).toBeVisible();
await expect(page.getByRole('button', { name: 'Record' })).toBeVisible();
await expect(page.getByRole('textbox')).toBeVisible();
await expect(page.getByText('1.00x').first()).toBeVisible();
await expect(page.getByText('1.00x').nth(1)).toBeVisible();
});
test('session selection screen test', async ({ page }) => {
await expect(page.getByRole('link', { name: 'Sample Session sample-session' })).toBeVisible();
await expect(page.getByRole('link', { name: 'spectrum' })).toHaveCount(0);
await page.getByRole('link', { name: 'Sample Session sample-session' }).click({
button: 'right'
});
await page.getByRole('menuitem', { name: 'Delete' }).click();
await page.getByRole('button', { name: 'Continue' }).click();
await expect(page.getByRole('link', { name: 'Sample Session sample-session' })).toHaveCount(0);
await page.getByRole('button').click();
await expect(page.getByLabel('Enter new session name')).toBeVisible();
await page.locator('input[name="sessionName"]').click();
await page.locator('input[name="sessionName"]').fill('spectrum');
await page.locator('input[name="sessionName"]').press('Enter');
await expect(
page
.locator('div')
.filter({ hasText: /^No files yet!$/ })
.first()
).toBeVisible();
await page.getByRole('link', { name: 'session' }).click();
await expect(page.getByRole('link', { name: 'spectrum' })).toBeVisible();
});
test.afterEach(deleteEverything);

20
app/tests/utils.ts Normal file
View file

@ -0,0 +1,20 @@
import type { Page } from '@playwright/test';
type Params = { page: Page };
export const setupTests = async ({ page }: Params) => {
await page.goto('http://localhost/admin');
await page.getByRole('button', { name: 'Delete all data' }).click();
await page.getByRole('button', { name: 'Seed database with sample user' }).click();
await page.getByRole('button', { name: 'Seed database with sample session' }).click();
await page.getByRole('button', { name: 'Seed database with TORGO' }).click();
};
export const deleteEverything = async ({ page }: Params) => {
await page.goto('http://localhost/admin');
await page.getByRole('button', { name: 'Delete all data' }).click();
await page.getByRole('button', { name: 'Seed database with sample user' }).click();
await page.getByRole('button', { name: 'Seed database with sample session' }).click();
await page.getByRole('button', { name: 'Seed database with TORGO' }).click();
await page.close();
};

View file

@ -10,7 +10,8 @@
"sourceMap": true,
"target": "ES2022",
"strict": true,
"moduleResolution": "bundler"
"moduleResolution": "bundler",
"allowImportingTsExtensions": true
}
// Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias
// except $lib which is handled by https://kit.svelte.dev/docs/configuration#files

View file

@ -1,8 +1,18 @@
import { sveltekit } from '@sveltejs/kit/vite';
import { defineConfig } from 'vitest/config';
import istanbul from 'vite-plugin-istanbul';
export default defineConfig({
plugins: [sveltekit()],
plugins: [
sveltekit(),
istanbul({
include: 'src/*',
exclude: ['node_modules', 'tests/'],
extension: ['.ts', '.svelte'],
requireEnv: false,
forceBuildInstrument: true
})
],
test: {
include: ['src/**/*{test,spec}.{js,ts}']
},

View file

@ -22,17 +22,18 @@ services:
env_file:
- '.variables.env'
volumes:
- ./data:/data
- ./data:/var/lib/postgresql/data
app:
restart: unless-stopped
stop_grace_period: 5s
environment:
PUBLIC_KERNEL_ORIGIN: http://kernel
PUBLIC_KERNEL_ORIGIN: http://kernel:8000
PG_CONNECTION_STRING: postgres://user:password@postgres:5432/spectral_db
build:
context: ./app
dockerfile: Dockerfile.prod
image: knyazer/spectral-app
expose:
- "5173"
depends_on:
@ -48,6 +49,7 @@ services:
- "443:443"
volumes:
- ./nginx.prod.conf:/etc/nginx/nginx.conf:ro
- /certs:/certs:ro
depends_on:
- app
@ -57,6 +59,7 @@ services:
build:
context: ./kernel
dockerfile: Dockerfile.prod
image: knyazer/spectral-kernel
expose:
- "8000"
depends_on:

View file

@ -28,7 +28,7 @@ services:
restart: unless-stopped
stop_grace_period: 500ms
environment:
PUBLIC_KERNEL_ORIGIN: http://kernel
PUBLIC_KERNEL_ORIGIN: http://kernel:8000
PG_CONNECTION_STRING: postgres://user:password@postgres:5432/spectral_db
build:
context: ./app

View file

@ -55,6 +55,6 @@ WORKDIR /kernel
COPY . .
# Command to run the application
RUN poetry export -f requirements.txt --without-hashes --without dev | poetry run pip3 install -r /dev/stdin
RUN poetry export -f requirements.txt --without-hashes --without=dev | poetry run pip3 install -r /dev/stdin
CMD poetry run uvicorn --workers=5 spectral.main:app --host 0.0.0.0 --proxy-headers --timeout-graceful-shutdown 10

230
kernel/poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "aiofiles"
@ -664,13 +664,13 @@ files = [
[[package]]
name = "email-validator"
version = "2.1.1"
version = "2.1.2"
description = "A robust email address syntax and deliverability validation library."
optional = false
python-versions = ">=3.8"
files = [
{file = "email_validator-2.1.1-py3-none-any.whl", hash = "sha256:97d882d174e2a65732fb43bfce81a3a834cbc1bde8bf419e30ef5ea976370a05"},
{file = "email_validator-2.1.1.tar.gz", hash = "sha256:200a70680ba08904be6d1eef729205cc0d687634399a5924d842533efb824b84"},
{file = "email_validator-2.1.2-py3-none-any.whl", hash = "sha256:d89f6324e13b1e39889eab7f9ca2f91dc9aebb6fa50a6d8bd4329ab50f251115"},
{file = "email_validator-2.1.2.tar.gz", hash = "sha256:14c0f3d343c4beda37400421b39fa411bbe33a75df20825df73ad53e06a9f04c"},
]
[package.dependencies]
@ -1839,6 +1839,148 @@ files = [
{file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
]
[[package]]
name = "nvidia-cublas-cu12"
version = "12.1.3.1"
description = "CUBLAS native runtime libraries"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
{file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
]
[[package]]
name = "nvidia-cuda-cupti-cu12"
version = "12.1.105"
description = "CUDA profiling tools runtime libs."
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
{file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
]
[[package]]
name = "nvidia-cuda-nvrtc-cu12"
version = "12.1.105"
description = "NVRTC native runtime libraries"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
{file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
]
[[package]]
name = "nvidia-cuda-runtime-cu12"
version = "12.1.105"
description = "CUDA Runtime native Libraries"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
{file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
]
[[package]]
name = "nvidia-cudnn-cu12"
version = "8.9.2.26"
description = "cuDNN runtime libraries"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"},
]
[package.dependencies]
nvidia-cublas-cu12 = "*"
[[package]]
name = "nvidia-cufft-cu12"
version = "11.0.2.54"
description = "CUFFT native runtime libraries"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
{file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
]
[[package]]
name = "nvidia-curand-cu12"
version = "10.3.2.106"
description = "CURAND native runtime libraries"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
{file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
]
[[package]]
name = "nvidia-cusolver-cu12"
version = "11.4.5.107"
description = "CUDA solver native runtime libraries"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
{file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
]
[package.dependencies]
nvidia-cublas-cu12 = "*"
nvidia-cusparse-cu12 = "*"
nvidia-nvjitlink-cu12 = "*"
[[package]]
name = "nvidia-cusparse-cu12"
version = "12.1.0.106"
description = "CUSPARSE native runtime libraries"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
{file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
]
[package.dependencies]
nvidia-nvjitlink-cu12 = "*"
[[package]]
name = "nvidia-nccl-cu12"
version = "2.20.5"
description = "NVIDIA Collective Communication Library (NCCL) Runtime"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
{file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"},
]
[[package]]
name = "nvidia-nvjitlink-cu12"
version = "12.5.40"
description = "Nvidia JIT LTO Library"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
{file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
]
[[package]]
name = "nvidia-nvtx-cu12"
version = "12.1.105"
description = "NVIDIA Tools Extension"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
{file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
]
[[package]]
name = "openai"
version = "1.34.0"
@ -3352,21 +3494,31 @@ files = [
[[package]]
name = "torch"
version = "2.3.1+cpu"
version = "2.3.1"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
optional = false
python-versions = ">=3.8.0"
files = [
{file = "torch-2.3.1+cpu-cp310-cp310-linux_x86_64.whl", hash = "sha256:d679e21d871982b9234444331a26350902cfd2d5ca44ce6f49896af8b3a3087d"},
{file = "torch-2.3.1+cpu-cp310-cp310-win_amd64.whl", hash = "sha256:500bf790afc2fd374a15d06213242e517afccc50a46ea5955d321a9a68003335"},
{file = "torch-2.3.1+cpu-cp311-cp311-linux_x86_64.whl", hash = "sha256:a272defe305dbd944aa28a91cc3db0f0149495b3ebec2e39723a7224fa05dc57"},
{file = "torch-2.3.1+cpu-cp311-cp311-win_amd64.whl", hash = "sha256:d2965eb54d3c8818e2280a54bd53e8246a6bb34e4b10bd19c59f35b611dd9f05"},
{file = "torch-2.3.1+cpu-cp312-cp312-linux_x86_64.whl", hash = "sha256:2141a6cb7021adf2f92a0fd372cfeac524ba460bd39ce3a641d30a561e41f69a"},
{file = "torch-2.3.1+cpu-cp312-cp312-win_amd64.whl", hash = "sha256:6acdca2530462611095c44fd95af75ecd5b9646eac813452fe0adf31a9bc310a"},
{file = "torch-2.3.1+cpu-cp38-cp38-linux_x86_64.whl", hash = "sha256:cab92d5101e6db686c5525e04d87cedbcf3a556073d71d07fbe7d1ce09630ffb"},
{file = "torch-2.3.1+cpu-cp38-cp38-win_amd64.whl", hash = "sha256:dbc784569a367fd425158cf4ae82057dd3011185ba5fc68440432ba0562cb5b2"},
{file = "torch-2.3.1+cpu-cp39-cp39-linux_x86_64.whl", hash = "sha256:a3cb8e61ba311cee1bb7463cbdcf3ebdfd071e2091e74c5785e3687eb02819f9"},
{file = "torch-2.3.1+cpu-cp39-cp39-win_amd64.whl", hash = "sha256:df68668056e62c0332e03f43d9da5d4278b39df1ba58d30ec20d34242070955d"},
{file = "torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:605a25b23944be5ab7c3467e843580e1d888b8066e5aaf17ff7bf9cc30001cc3"},
{file = "torch-2.3.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f2357eb0965583a0954d6f9ad005bba0091f956aef879822274b1bcdb11bd308"},
{file = "torch-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:32b05fe0d1ada7f69c9f86c14ff69b0ef1957a5a54199bacba63d22d8fab720b"},
{file = "torch-2.3.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7c09a94362778428484bcf995f6004b04952106aee0ef45ff0b4bab484f5498d"},
{file = "torch-2.3.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:b2ec81b61bb094ea4a9dee1cd3f7b76a44555375719ad29f05c0ca8ef596ad39"},
{file = "torch-2.3.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:490cc3d917d1fe0bd027057dfe9941dc1d6d8e3cae76140f5dd9a7e5bc7130ab"},
{file = "torch-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5802530783bd465fe66c2df99123c9a54be06da118fbd785a25ab0a88123758a"},
{file = "torch-2.3.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:a7dd4ed388ad1f3d502bf09453d5fe596c7b121de7e0cfaca1e2017782e9bbac"},
{file = "torch-2.3.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:a486c0b1976a118805fc7c9641d02df7afbb0c21e6b555d3bb985c9f9601b61a"},
{file = "torch-2.3.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:224259821fe3e4c6f7edf1528e4fe4ac779c77addaa74215eb0b63a5c474d66c"},
{file = "torch-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5fdccbf6f1334b2203a61a0e03821d5845f1421defe311dabeae2fc8fbeac2d"},
{file = "torch-2.3.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:3c333dc2ebc189561514eda06e81df22bf8fb64e2384746b2cb9f04f96d1d4c8"},
{file = "torch-2.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:07e9ba746832b8d069cacb45f312cadd8ad02b81ea527ec9766c0e7404bb3feb"},
{file = "torch-2.3.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:462d1c07dbf6bb5d9d2f3316fee73a24f3d12cd8dacf681ad46ef6418f7f6626"},
{file = "torch-2.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff60bf7ce3de1d43ad3f6969983f321a31f0a45df3690921720bcad6a8596cc4"},
{file = "torch-2.3.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:bee0bd33dc58aa8fc8a7527876e9b9a0e812ad08122054a5bff2ce5abf005b10"},
{file = "torch-2.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:aaa872abde9a3d4f91580f6396d54888620f4a0b92e3976a6034759df4b961ad"},
{file = "torch-2.3.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:3d7a7f7ef21a7520510553dc3938b0c57c116a7daee20736a9e25cbc0e832bdc"},
{file = "torch-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:4777f6cefa0c2b5fa87223c213e7b6f417cf254a45e5829be4ccd1b2a4ee1011"},
{file = "torch-2.3.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:2bb5af780c55be68fe100feb0528d2edebace1d55cb2e351de735809ba7391eb"},
]
[package.dependencies]
@ -3375,18 +3527,25 @@ fsspec = "*"
jinja2 = "*"
mkl = {version = ">=2021.1.1,<=2021.4.0", markers = "platform_system == \"Windows\""}
networkx = "*"
nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
sympy = "*"
triton = {version = "2.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
typing-extensions = ">=4.8.0"
[package.extras]
opt-einsum = ["opt-einsum (>=3.3)"]
optree = ["optree (>=0.9.1)"]
[package.source]
type = "legacy"
url = "https://download.pytorch.org/whl/cpu"
reference = "pytorch_cpu"
[[package]]
name = "tqdm"
version = "4.66.4"
@ -3473,6 +3632,29 @@ torchhub = ["filelock", "huggingface-hub (>=0.23.0,<1.0)", "importlib-metadata",
video = ["av (==9.2.0)", "decord (==0.6.0)"]
vision = ["Pillow (>=10.0.1,<=15.0)"]
[[package]]
name = "triton"
version = "2.3.1"
description = "A language and compiler for custom Deep Learning operations"
optional = false
python-versions = "*"
files = [
{file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"},
{file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"},
{file = "triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10"},
{file = "triton-2.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13bf35a2b659af7159bf78e92798dc62d877aa991de723937329e2d382f1991"},
{file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"},
{file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"},
]
[package.dependencies]
filelock = "*"
[package.extras]
build = ["cmake (>=3.20)", "lit"]
tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"]
tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
[[package]]
name = "typer"
version = "0.12.3"
@ -3626,13 +3808,13 @@ files = [
[[package]]
name = "urllib3"
version = "2.2.1"
version = "2.2.2"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.8"
files = [
{file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
{file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
{file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"},
{file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"},
]
[package.extras]
@ -3995,4 +4177,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "1fb4e257666189bc539017443f6588efa3debbd5d68676f7a5989be9fd722c42"
content-hash = "4f6949dbcbfe3eb478efa663bcbad1e594a39cf16a4c01ea9fe092bb6744d0ca"

View file

@ -11,8 +11,8 @@ authors = [
]
readme = "README.md"
homepage = "https://spectral.ewi.tudelft.nl"
#repository = ""
#documentation = ""
repository = "https://github.com/TU-Delft-SALT-Group/spectral"
documentation = "https://spectral.ewi.tudelft.nl/docs"
keywords = ["spectral", "atypical speech analysis", "speech recognition"]
classifiers = [
"Development Status :: 2 - Pre-Alpha",
@ -42,9 +42,9 @@ allosaurus = "^1.0.2"
python-magic = "^0.4.27"
openai = "^1.33.0"
mytextgrid = "^0.8.0"
torch = {version = "^2.3.1+cpu", source = "pytorch_cpu"}
bert-score = "^0.3.13"
jarowinkler = "^2.0.1"
torch = "^2.3.1"
[tool.poetry.group.dev.dependencies]
pytest = "^8.2.0"
@ -57,12 +57,10 @@ pytest-testmon = "^2.1.1"
pytest-mock = "^3.14.0"
beartype = "^0.18.5"
[[tool.poetry.source]]
name = "pytorch_cpu"
url = "https://download.pytorch.org/whl/cpu"
priority = "explicit"
priority = "supplemental"
[tool.mutmut]
paths_to_mutate="spectral/"
@ -84,4 +82,4 @@ select = ["F", "E", "W", "C90", "I", "N", "D", "UP", "YTT", "ASYNC",
"TID", "TCH", "INT", "ARG", "PTH", "TD", "FIX", "ERA", "PD",
"PL", "C", "R", "TRY", "FLY", "NPY", "AIR", "PERF", "RUF"]
ignore = ["D417", "D205", "D203", "D212", "EM101", "B008", "RET503", "PYI041", "BLE001", "TRY003", "TCH001", "TCH003", "ISC001"]
ignore = ["D417", "D205", "D203", "D212", "EM101", "B008", "RET503", "PYI041", "BLE001", "TRY003", "TCH001", "TCH003", "ISC001", "COM812"]

View file

@ -50,6 +50,21 @@ class SimpleInfoResponse(BaseModel):
frame: FrameAnalysisResponse | None
class WaveformResponse(BaseModel):
"""
WaveformResponse model representing pitches and formants found in a signal.
Attributes
----------
pitch (list[float]): List of pitch frequencies for multiple frames (in Hz).
formants (list[list[float]]): List of f1 and f2's for multiple frames (in Hz).
"""
pitch: list[float]
formants: list[list[float]]
class VowelSpaceResponse(BaseModel):
"""
VowelSpaceResponse model representing formant location in the vowel space.
@ -213,3 +228,10 @@ class TranscriptionsTextgridModel(BaseModel):
"""Textgrid model transcription representation."""
transcriptions: list[TranscriptionTextgridModel]
class GeneratedTranscriptionsModel(BaseModel):
"""Transcriptions that have been generated automatically with one of the possible models."""
language: str | None
transcription: list[TranscriptionSegment]

View file

@ -9,6 +9,8 @@ from bert_score import BERTScorer
from jarowinkler import jarowinkler_similarity
from jiwer import process_characters, process_words
scorer = BERTScorer(model_type="bert-base-uncased")
def calculate_error_rates(
reference_annotations: list[dict],
@ -92,7 +94,6 @@ def calculate_bert_score(reference: str, hypothesis: str) -> float:
if hypothesis == "":
return 0.0
scorer = BERTScorer(model_type="bert-base-uncased")
(p, r, f) = scorer.score([hypothesis], [reference])
# Return the F1 score

View file

@ -12,11 +12,13 @@ from fastapi.responses import JSONResponse
from .data_objects import (
ErrorRateResponse,
FileStateBody,
GeneratedTranscriptionsModel,
SimpleInfoResponse,
SpectrogramResponse,
TranscriptionSegment,
TranscriptionsTextgridModel,
VowelSpaceResponse,
WaveformResponse,
)
from .database import Database
from .mode_handler import (
@ -75,6 +77,7 @@ app: FastAPI = FastAPI(default_response_class=ORJSONResponse, root_path="/api")
VowelSpaceResponse,
list[list[TranscriptionSegment]],
ErrorRateResponse,
WaveformResponse,
SpectrogramResponse,
],
responses=signal_modes_response_examples,
@ -133,11 +136,14 @@ async def analyze_signal_mode(
@app.get(
"/transcription/{model}/{file_id}",
response_model=dict[str, str | list[TranscriptionSegment]],
response_model=GeneratedTranscriptionsModel,
responses=transcription_response_examples,
)
async def transcribe_file(
model: Annotated[str, Path(title="The transcription model")],
model: Annotated[
Literal["whisper", "deepgram", "allosaurus", "whisper-torgo-1-epoch"],
Path(title="The transcription model"),
],
file_id: Annotated[str, Path(title="The ID of the file")],
database=Depends(get_db),
) -> Any:

View file

@ -15,7 +15,9 @@ from .frame_analysis import (
validate_frame_index,
)
from .signal_analysis import (
calculate_sound_f1_f2,
calculate_sound_formants_for_spectrogram,
calculate_sound_pitch,
get_audio,
signal_to_sound,
simple_signal_info,
@ -91,9 +93,34 @@ def spectrogram_mode(database: DatabaseType, file_state: FileStateType) -> Any:
return calculate_sound_formants_for_spectrogram(sound)
def waveform_mode(database: DatabaseType, file_state: FileStateType) -> Any: # noqa: ARG001
"""TBD."""
return None
def waveform_mode(database: DatabaseType, file_state: FileStateType) -> dict[str, Any]:
"""
Extract the pitch, f1 and f2 of multiple frames to show in waveform mode.
Parameters
----------
- database: The database object used to fetch the file.
- file_state: A dictionary containing the state of the file, including frame indices.
Returns
-------
- dict: A dictionary containing the found pitches and formants.
"""
file = get_file(database, file_state)
audio = get_audio(file)
data = audio.get_array_of_samples()
sound = signal_to_sound(data, audio.frame_rate)
pitch_dict = calculate_sound_pitch(sound)
pitch = []
if pitch_dict is not None:
pitch = pitch_dict["data"]
formants_dict = calculate_sound_f1_f2(sound)
formants = []
if formants_dict is not None:
formants = formants_dict["data"]
return {"pitch": pitch, "formants": formants}
def vowel_space_mode(

View file

@ -7,12 +7,12 @@ import tempfile
from allosaurus.app import read_recognizer # type: ignore
from spectral.transcription.transcription_utils import fill_gaps
from spectral.types import FileStateType
from spectral.types import FileStateType, TranscriptionType
from .deepgram import deepgram_transcription
def allosaurus_transcription(file: FileStateType) -> dict[str, str | list[dict]]:
def allosaurus_transcription(file: FileStateType) -> TranscriptionType:
"""
Calculate the transcription on phoneme level using the allosaurus model.
@ -113,7 +113,7 @@ def get_phoneme_word_splits(
def get_phoneme_transcriptions(
language: str,
phoneme_word_splits: list[dict],
) -> dict[str, str | list[dict]]:
) -> TranscriptionType:
"""
Convert the phoneme word groups to 1 list of phoneme transcriptions with adjusted start
and end times.

View file

@ -6,8 +6,10 @@ import os
from deepgram import DeepgramClient, PrerecordedOptions
from spectral.types import TranscriptionType
def deepgram_transcription(data: bytes) -> dict[str, str | list[dict]]:
def deepgram_transcription(data: bytes) -> TranscriptionType:
"""
Transcribe audio data using Deepgram API.

View file

@ -0,0 +1,103 @@
"""
Interface to load arbitrary* HF models.
Some notes:
Most of the models from HF we tried require patching by hand to be run. Don't
be the person who is not capable of checking the correctness of export of their model!
If the model is exported very well, you can use just a single line (with transformers.pipeline)
to run it, which is very convenient. But, the current model we use requires patching of its
tokenizer by hand, which is *very* annoying. And this is the reason for such an awkward loading
of the model, where we don't load it from the huggingface, but load it from the local folder.
Creating such a folder is actually not thaaat hard:
clone the whisper tiny: git clone https://huggingface.co/openai/whisper-tiny
clone the torgo-whatever model: git clone https://huggingface.co/jindaznb/torgo_tiny_finetune_F01_frozen_encoder
Make sure you run ```git lfs pull``` in each repo, this will pull the actual weights. You might need
to install the git-lfs as a system package, with something like ```apt install git-lfs```.
Afterwards, you need to copy these files from the "whisper" folder into the "torgo-whatever" folder:
* merges.txt
* vocab.json
* tokenizer.json
* tokenizer_config.json
And then rename the folder torgo-whatever into just torgo.
Now the support for the torgo transcription mode should be present.
"""
from collections.abc import Callable
from functools import lru_cache
from pathlib import Path
import numpy as np
import torch
from transformers import WhisperForConditionalGeneration, WhisperProcessor
from spectral import signal_analysis
from spectral.types import TranscriptionType
@lru_cache
def _get_model_by_name(model_name: str) -> tuple:
if model_name != "torgo":
raise RuntimeError("""We don't support any other model besides
'torgo' one for now: some models on huggingface
require manual patching, and this model in particular
is the jindaznb/torgo_tiny_finetune_F01_frozen_encoder
patched with tiny whisper tokenizer...""")
path = Path(__file__).parent / Path(model_name)
model = WhisperForConditionalGeneration.from_pretrained(path, local_files_only=True)
processor = WhisperProcessor.from_pretrained(path, local_files_only=True)
return (model, processor)
@lru_cache
def get_transcribe_fn(model_name: str) -> Callable[[bytes], str]:
"""Return a complete prediction function, that adheres to the usual API."""
model, processor = _get_model_by_name(model_name)
required_sr = 16000 # magic number, requirement of the model
def transcribe_fn(data: bytes) -> str:
audio = signal_analysis.get_audio({"data": data})
# trick from https://github.com/openai/whisper/discussions/983
data = np.frombuffer(audio.raw_data, np.int16).flatten().astype(np.float32) / 32768.0 # type: ignore
input_features = processor(
data, sampling_rate=required_sr, return_tensors="pt"
).input_features # type: ignore
# Generate transcription
with torch.no_grad():
predicted_ids = model.generate(input_features) # type: ignore
return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] # type: ignore
return transcribe_fn
def hf_transcription(data: bytes, model_name: str) -> TranscriptionType:
"""
Get transcription from hf given wav bytes representation.
Args:
----
data (bytes): list of data bytes representing a WAV audio signal
Returns:
-------
list[dict]: list of dictionaries containing start, end and value
"""
try:
transcription = get_transcribe_fn(model_name)(data)
duration = signal_analysis.calculate_signal_duration(
signal_analysis.get_audio({"data": data})
)
except RuntimeError:
return {}
else:
return {
"language": "unk",
"transcription": [{"value": transcription, "start": 0, "end": duration}],
}

View file

@ -9,8 +9,10 @@ from typing import Any
from openai import OpenAI
from spectral.types import TranscriptionType
def whisper_transcription(data: bytes) -> dict[str, str | list[dict]]:
def whisper_transcription(data: bytes) -> TranscriptionType:
"""
Get transcription from whisper from an list of WAV bytes.

View file

@ -2,15 +2,16 @@
from fastapi import HTTPException
from spectral.types import FileStateType
from spectral.types import FileStateType, TranscriptionType
from .models.allosaurus import allosaurus_transcription
from .models.deepgram import deepgram_transcription
from .models.huggingface_adapter import hf_transcription
from .models.whisper import whisper_transcription
from .transcription_utils import fill_gaps
def get_transcription(model: str, file: FileStateType):
def get_transcription(model: str, file: FileStateType) -> TranscriptionType:
"""
Get transcription of an audio file using the specified model.
@ -34,6 +35,14 @@ def get_transcription(model: str, file: FileStateType):
return fill_gaps(deepgram_transcription(file["data"]), file)
if model == "whisper":
return fill_gaps(whisper_transcription(file["data"]), file)
if model == "whisper-torgo-1-epoch":
# name torgo here does not correspond to an actual hf model, it is the path to the local
# folder containing the model. We do this because most hf models require patching.
# If you want to change this, look into hf_transcription implementation, it has more info
return fill_gaps(hf_transcription(file["data"], model_name="torgo"), file)
if model == "allosaurus":
return fill_gaps(allosaurus_transcription(file), file)
# When adding a new mode make sure you also change it in the main.py, where the fastapi request
# is validated with pydantic. Otherwise you will just get 422 as a response.
# And add it in /app/src/lib/analysis/modes/transcription/TranscriptionPlugin.svelte
raise HTTPException(status_code=404, detail="Model was not found")

View file

@ -3,12 +3,13 @@
from __future__ import annotations
from spectral.signal_analysis import calculate_signal_duration, get_audio
from spectral.types import TranscriptionType
def fill_gaps(
transcriptions_and_language: dict[str, str | list[dict]],
transcriptions_and_language: TranscriptionType,
file: dict,
) -> dict[str, str | list[dict]]:
) -> TranscriptionType:
"""
Fill the gaps between consecutive transcription dictionaries such that all
time is accounted for.

View file

@ -12,3 +12,4 @@ AudioType = AudioSegment
SoundType = parselmouth.Sound
FileStateType = dict
DatabaseType = Database | Iterator[Database]
TranscriptionType = dict[str, str | list[dict] | None]

View file

@ -92,8 +92,9 @@ def test_signal_correct_waveform(db_mock, file_state):
response = client.post("/signals/modes/waveform", json={"fileState": file_state})
assert response.status_code == 200, "Expected status code 200 for waveform mode"
result = response.json()
assert result is None, "Expected response to be None"
assert db_mock.fetch_file.call_count == 0, "Expected fetch_file not to be called"
assert len(result["pitch"]) == 453, "Expected pitch array length to be 453"
assert len(result["formants"]) == 723, "Expected formants length to be 723"
assert db_mock.fetch_file.call_count == 1, "Expected fetch_file to be called once"
def test_signal_correct_vowel_space(db_mock, file_state):
@ -245,9 +246,8 @@ def test_transcription_model_found(db_mock):
def test_transcription_model_not_found(db_mock):
response = client.get("/transcription/non_existant_model/1")
assert response.status_code == 404, "Expected status code 404 when model is not found"
assert response.json()["detail"] == "Model was not found", "Expected detail message 'Model was not found'"
assert db_mock.fetch_file.call_count == 1, "Expected fetch_file to be called once"
assert response.status_code == 422, "Expected status code 422 when model is not part of the allowed list"
assert db_mock.fetch_file.call_count == 0, "Expected fetch_file to be called never"
def test_analyze_signal_mode_invalid_id(db_mock, file_state):
@ -261,9 +261,8 @@ def test_analyze_signal_mode_invalid_id(db_mock, file_state):
def test_transcribe_file_invalid_model(db_mock):
response = client.get("/transcription/invalid_model/1")
assert response.status_code == 404, "Expected status code 404 when transcription model is invalid"
assert response.json()["detail"] == "Model was not found", "Expected detail message 'Model was not found'"
assert db_mock.fetch_file.call_count == 1, "Expected fetch_file to be called once"
assert response.status_code == 422, "Expected status code 404 when transcription model is invalid"
assert db_mock.fetch_file.call_count == 0, "Expected fetch_file to be called never"
@pytest.mark.skip(reason="Not implemented")

View file

@ -1,6 +1,8 @@
import pytest
import torch
from unittest.mock import Mock, patch
from fastapi import HTTPException
from spectral.transcription.models.huggingface_adapter import hf_transcription
from spectral.transcription.transcription import (
get_transcription,
deepgram_transcription,
@ -183,3 +185,35 @@ def test_whisper_transcription(mock_whisper_client):
assert result == expected_result, f"Expected {expected_result}, but got {result}"
(mock_whisper_client.assert_called_once_with(api_key="test_key"))
(mock_client_instance.audio.transcriptions.create.assert_called_once())
def test_hf_transcription_no_model():
assert hf_transcription(b"audio data", "arst") == {}
@patch("spectral.signal_analysis.get_audio")
@patch("spectral.signal_analysis.calculate_signal_duration")
@patch("spectral.transcription.models.huggingface_adapter._get_model_by_name")
def test_hf_transcription_basic(mock_model_getter, sig_duration, get_audio):
fake_audio = Mock()
get_audio.return_value = fake_audio
fake_audio.raw_data = b""
sig_duration.return_value = 1.5
mock_model = Mock()
mock_processor = Mock()
mock_model_getter.return_value = (mock_model, mock_processor)
mock_model.generate.return_value = torch.tensor([1, 2, 3])
mock_input = Mock()
mock_processor.return_value = mock_input
mock_input.return_value = None
def fake_decode(input, *args, **kws):
assert torch.allclose(input, torch.tensor([1, 2, 3]))
return "i love apples"
mock_processor.batch_decode = fake_decode
assert hf_transcription(b"data", "torgo") == {
"language": "unk",
"transcription": [{"end": 1.5, "start": 0, "value": "i"}],
}

Some files were not shown because too many files have changed in this diff Show more