Refactor LLM processor and improve async handling

Move contextual information handling from noise filtering to extraction
and centralize LLM call logic. Wrap blocking transcription and state
update calls in asyncio.to_thread to prevent event loop blocking.
Update transcriber model size to base.
This commit is contained in:
2026-05-28 18:54:09 -07:00
parent afa8d17f10
commit 2363cde160
5 changed files with 26 additions and 42 deletions
+17 -28
View File
@@ -61,6 +61,18 @@ class LLMProcessor:
self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
def _strip_markdown_code_blocks(self, content: str) -> str:
"""
Strips markdown code blocks (e.g., ```json ... ```) from the content.
"""
import re
# Remove opening and closing code blocks
content = re.sub(
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
).strip()
return content
def _call_llm(
self,
system_prompt: str,
@@ -93,15 +105,7 @@ class LLMProcessor:
)
content = response.choices[0].message.content
# Strip markdown code blocks if present
if content.startswith("```"):
import re
content = re.sub(
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
).strip()
return content
return self._strip_markdown_code_blocks(content)
except Exception as e:
logger.error(f"LLM Error: {e}")
return ""
@@ -147,34 +151,19 @@ class LLMProcessor:
"""
logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
try:
# Using standard chat.completions.create with JSON mode for better compatibility with vLLM
logger.info("LLM Processor (Extract): Sending request to backend...")
system_prompt = EXTRACTION_SYSTEM_PROMPT
if context:
system_prompt += f"\n{context}"
messages = [
{"role": "system", "content": system_prompt},
]
messages.append({"role": "user", "content": filtered_text})
for message in messages:
logger.info(f"LLM Processor (Extract): Message: {message}")
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
result = self._call_llm(
system_prompt=system_prompt,
user_prompt=filtered_text,
response_format={"type": "json_object"},
extra_body={"enable_thinking": False},
)
logger.info("LLM Processor (Extract): Response received from backend.")
import json
content = response.choices[0].message.content
logger.info(f"LLM Processor (Extract): Raw JSON response: {content}")
data = json.loads(content)
data = json.loads(result)
# Map the JSON data to the Pydantic model
return ExtractionResult(**data)