"""Speaker-name identification (§4.5 enhancement). In a 1-on-1 interview the host introduces the guest by name at the top. Reading the transcript head with the LLM, we attach a real NAME to each diarized speaker → voiceprints.person_label. This gives the independence graph a SECOND, orthogonal overlap signal: the same NAMED guest across two shows is a shared_guest edge even when the voiceprints don't cluster (different mic/codec/room). It complements voiceprint cosine matching and is robust to fingerprint drift — exactly the case the operator flagged. """ from __future__ import annotations import json import logging log = logging.getLogger(__name__) _SYS = ( 'You identify the speakers in a podcast/interview transcript. Each line is "LABEL: text". ' "Using the introduction and context, determine each LABEL's real full name and role. In an " "interview the host normally introduces themselves and the guest within the first minute. Only " "assert a name you can actually support from the text — if you cannot tell, use null. " 'Return ONLY JSON: {"speakers": {"": {"name": "Full Name" or null, ' '"role": "host"|"guest"|"panelist"|"unknown", "confidence": "low"|"med"|"high"}}}.' ) def identify_speakers(backend, transcript_head: str, *, source_name: str, host_hint: str | None = None) -> dict: """Returns {label: {name, role, confidence}}. `backend` is any extract.backends backend.""" ctx = f"Show: {source_name}." if host_hint: ctx += f" The show's usual host is {host_hint}." ctx += "\n\nTRANSCRIPT (beginning):\n" + transcript_head messages = [{"role": "system", "content": _SYS}, {"role": "user", "content": ctx}] raw = backend.complete_json(messages, max_tokens=600) try: obj = json.loads(raw) except Exception: i, j = raw.find("{"), raw.rfind("}") if i < 0 or j < 0: return {} try: obj = json.loads(raw[i:j + 1]) except Exception: return {} spk = obj.get("speakers", {}) if isinstance(obj, dict) else {} return spk if isinstance(spk, dict) else {}