cse2000-software-project/kernel/spectral/transcription/models/whisper.py
2024-06-10 16:15:15 +02:00

34 lines
992 B
Python

from openai import OpenAI
import os
import tempfile
def whisper_transcription(data: bytes) -> list[dict]:
try:
client = OpenAI(api_key=os.getenv("WHISPER_KEY"))
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
temp_wav.write(data)
temp_wav_filename = temp_wav.name
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=open(temp_wav_filename, "rb"),
response_format="verbose_json",
timestamp_granularities=["word"],
)
res = []
if hasattr(transcription, "words"):
words = transcription.words # pyright: ignore[reportAttributeAccessIssue]
for word in words:
res.append(
{"value": word["word"], "start": word["start"], "end": word["end"]}
)
return res
except Exception as e:
print(f"Exception: {e}")
return []