feat: implement core D&D helpers logic and system architecture

2026-05-25 22:14:58 -07:00
parent 5bb483431f
commit 685586318f
36 changed files with 1137 additions and 0 deletions
@@ -0,0 +1,69 @@
+import logging
+
+from faster_whisper import WhisperModel
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class Transcriber:
+    """
+    Converts audio chunks (numpy arrays) into text using faster-whisper.
+    """
+
+    def __init__(self, model_size="base", device="cpu", compute_type="int8"):
+        """
+        Initializes the faster-whisper model.
+
+        Args:
+            model_size (str): The size of the model to use (e.g., "tiny", "base", "small").
+            device (str): The device to run the model on ("cpu" or "cuda").
+            compute_type (str): The compute type to use (e.g., "int8", "float16").
+        """
+        logger.info(
+            f"Loading faster-whisper model: {model_size} on {device} ({compute_type})..."
+        )
+        try:
+            self.model = WhisperModel(
+                model_size, device=device, compute_type=compute_type
+            )
+            logger.info("Model loaded successfully.")
+        except Exception as e:
+            logger.error(f"Failed to load faster-whisper model: {e}")
+            raise
+
+    def transcribe(self, audio_chunk):
+        """
+        Transcribes a single audio chunk.
+
+        Args:
+            audio_chunk (np.ndarray): The audio data as a numpy array.
+
+        Returns:
+            str: The transcribed text.
+        """
+        if audio_chunk is None:
+            return ""
+
+        try:
+            # faster-whisper expects audio in float32
+            audio_data = audio_chunk.astype("float32")
+
+            # Transcribe the audio
+            segments, info = self.model.transcribe(audio_data, beam_size=5)
+
+            # Combine segments into a single string
+            text = " ".join([segment.text.strip() for segment in segments])
+
+            return text.strip()
+        except Exception as e:
+            logger.error(f"Transcription error: {e}")
+            return ""
+
+    def close(self):
+        """
+        Explicitly release model resources if necessary.
+        """
+        # faster-whisper's WhisperModel doesn't have a standard close(),
+        # but we'll provide this for consistency.
+        pass