Refactor LLM processor and improve async handling
Move contextual information handling from noise filtering to extraction and centralize LLM call logic. Wrap blocking transcription and state update calls in asyncio.to_thread to prevent event loop blocking. Update transcriber model size to base.
This commit is contained in:
+17
-28
@@ -61,6 +61,18 @@ class LLMProcessor:
|
||||
|
||||
self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
|
||||
|
||||
def _strip_markdown_code_blocks(self, content: str) -> str:
|
||||
"""
|
||||
Strips markdown code blocks (e.g., ```json ... ```) from the content.
|
||||
"""
|
||||
import re
|
||||
|
||||
# Remove opening and closing code blocks
|
||||
content = re.sub(
|
||||
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
|
||||
).strip()
|
||||
return content
|
||||
|
||||
def _call_llm(
|
||||
self,
|
||||
system_prompt: str,
|
||||
@@ -93,15 +105,7 @@ class LLMProcessor:
|
||||
)
|
||||
content = response.choices[0].message.content
|
||||
|
||||
# Strip markdown code blocks if present
|
||||
if content.startswith("```"):
|
||||
import re
|
||||
|
||||
content = re.sub(
|
||||
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
|
||||
).strip()
|
||||
|
||||
return content
|
||||
return self._strip_markdown_code_blocks(content)
|
||||
except Exception as e:
|
||||
logger.error(f"LLM Error: {e}")
|
||||
return ""
|
||||
@@ -147,34 +151,19 @@ class LLMProcessor:
|
||||
"""
|
||||
logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
|
||||
try:
|
||||
# Using standard chat.completions.create with JSON mode for better compatibility with vLLM
|
||||
logger.info("LLM Processor (Extract): Sending request to backend...")
|
||||
|
||||
system_prompt = EXTRACTION_SYSTEM_PROMPT
|
||||
if context:
|
||||
system_prompt += f"\n{context}"
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
]
|
||||
messages.append({"role": "user", "content": filtered_text})
|
||||
|
||||
for message in messages:
|
||||
logger.info(f"LLM Processor (Extract): Message: {message}")
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
result = self._call_llm(
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=filtered_text,
|
||||
response_format={"type": "json_object"},
|
||||
extra_body={"enable_thinking": False},
|
||||
)
|
||||
logger.info("LLM Processor (Extract): Response received from backend.")
|
||||
|
||||
import json
|
||||
|
||||
content = response.choices[0].message.content
|
||||
logger.info(f"LLM Processor (Extract): Raw JSON response: {content}")
|
||||
data = json.loads(content)
|
||||
data = json.loads(result)
|
||||
|
||||
# Map the JSON data to the Pydantic model
|
||||
return ExtractionResult(**data)
|
||||
|
||||
Reference in New Issue
Block a user