feat: implement core D&D helpers logic and system architecture

This commit is contained in:
2026-05-25 22:14:58 -07:00
parent 5bb483431f
commit 685586318f
36 changed files with 1137 additions and 0 deletions
+69
View File
@@ -0,0 +1,69 @@
import logging
from faster_whisper import WhisperModel
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class Transcriber:
"""
Converts audio chunks (numpy arrays) into text using faster-whisper.
"""
def __init__(self, model_size="base", device="cpu", compute_type="int8"):
"""
Initializes the faster-whisper model.
Args:
model_size (str): The size of the model to use (e.g., "tiny", "base", "small").
device (str): The device to run the model on ("cpu" or "cuda").
compute_type (str): The compute type to use (e.g., "int8", "float16").
"""
logger.info(
f"Loading faster-whisper model: {model_size} on {device} ({compute_type})..."
)
try:
self.model = WhisperModel(
model_size, device=device, compute_type=compute_type
)
logger.info("Model loaded successfully.")
except Exception as e:
logger.error(f"Failed to load faster-whisper model: {e}")
raise
def transcribe(self, audio_chunk):
"""
Transcribes a single audio chunk.
Args:
audio_chunk (np.ndarray): The audio data as a numpy array.
Returns:
str: The transcribed text.
"""
if audio_chunk is None:
return ""
try:
# faster-whisper expects audio in float32
audio_data = audio_chunk.astype("float32")
# Transcribe the audio
segments, info = self.model.transcribe(audio_data, beam_size=5)
# Combine segments into a single string
text = " ".join([segment.text.strip() for segment in segments])
return text.strip()
except Exception as e:
logger.error(f"Transcription error: {e}")
return ""
def close(self):
"""
Explicitly release model resources if necessary.
"""
# faster-whisper's WhisperModel doesn't have a standard close(),
# but we'll provide this for consistency.
pass