Refactor LLM processor and improve async handling

Move contextual information handling from noise filtering to extraction and centralize LLM call logic. Wrap blocking transcription and state update calls in asyncio.to_thread to prevent event loop blocking. Update transcriber model size to base.
2026-05-28 18:54:09 -07:00
parent afa8d17f10
commit 2363cde160
5 changed files with 26 additions and 42 deletions
@@ -61,6 +61,18 @@ class LLMProcessor:

        self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")

+    def _strip_markdown_code_blocks(self, content: str) -> str:
+        """
+        Strips markdown code blocks (e.g., ```json ... ```) from the content.
+        """
+        import re
+
+        # Remove opening and closing code blocks
+        content = re.sub(
+            r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
+        ).strip()
+        return content
+
    def _call_llm(
        self,
        system_prompt: str,
@@ -93,15 +105,7 @@ class LLMProcessor:
            )
            content = response.choices[0].message.content

-            # Strip markdown code blocks if present
-            if content.startswith("```"):
-                import re
-
-                content = re.sub(
-                    r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
-                ).strip()
-
-            return content
+            return self._strip_markdown_code_blocks(content)
        except Exception as e:
            logger.error(f"LLM Error: {e}")
            return ""
@@ -147,34 +151,19 @@ class LLMProcessor:
        """
        logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
        try:
-            # Using standard chat.completions.create with JSON mode for better compatibility with vLLM
-            logger.info("LLM Processor (Extract): Sending request to backend...")
-
            system_prompt = EXTRACTION_SYSTEM_PROMPT
            if context:
                system_prompt += f"\n{context}"

-            messages = [
-                {"role": "system", "content": system_prompt},
-            ]
-            messages.append({"role": "user", "content": filtered_text})
-
-            for message in messages:
-                logger.info(f"LLM Processor (Extract): Message: {message}")
-
-            response = self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
+            result = self._call_llm(
+                system_prompt=system_prompt,
+                user_prompt=filtered_text,
                response_format={"type": "json_object"},
-                extra_body={"enable_thinking": False},
            )
-            logger.info("LLM Processor (Extract): Response received from backend.")

            import json

-            content = response.choices[0].message.content
-            logger.info(f"LLM Processor (Extract): Raw JSON response: {content}")
-            data = json.loads(content)
+            data = json.loads(result)

            # Map the JSON data to the Pydantic model
            return ExtractionResult(**data)