v0.10.0:1 - hotfix: merge function now joins words with proper spacing
Smoke testing v0.10.0:0 against a real anarlog audio.mp3 showed the
output running words together: "I'mrecordingrightnow", "don'tyoutry".
Root cause: _merge_words_with_speakers was doing "".join(cur_words),
assuming Parakeet returns words with leading whitespace (which the
hyprnote local Parakeet does, but the Spark-hosted Parakeet does not).
Rewrote the join with a small helper that:
- Strips each token (handles both leading-space and no-leading-space
word formats)
- Joins with a single space
- Keeps punctuation tight — no space before period/comma/colon/etc.
Verified post-install with the same test audio:
[00:06] Speaker_0: I'm I'm recording right now.
[00:18] Speaker_1: you're you're on your computer and your phone, right?
No other changes — Parakeet container patches and the endpoint shape
stay identical.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -344,6 +344,24 @@ def _merge_words_with_speakers(words: list[dict], diar_turns: list[dict]) -> lis
|
||||
return []
|
||||
SILENCE_BREAK_S = 1.5
|
||||
|
||||
def _join_words(parts: list[str]) -> str:
|
||||
"""Join word tokens with proper spacing. Different STT outputs vary —
|
||||
some include leading spaces in the word text (' morning'), some don't
|
||||
('morning'). Normalize by stripping each token then joining with one
|
||||
space; collapse multiple spaces. Keeps punctuation tight (no space
|
||||
before period/comma/etc.)."""
|
||||
cleaned = [p.strip() for p in parts if p and p.strip()]
|
||||
if not cleaned:
|
||||
return ""
|
||||
out = cleaned[0]
|
||||
for token in cleaned[1:]:
|
||||
# No leading space before pure-punctuation tokens
|
||||
if token and token[0] in ".,;:!?)]}'\"":
|
||||
out += token
|
||||
else:
|
||||
out += " " + token
|
||||
return out
|
||||
|
||||
blocks: list[dict] = []
|
||||
cur_words: list[str] = []
|
||||
cur_speaker: Optional[str] = None
|
||||
@@ -367,7 +385,7 @@ def _merge_words_with_speakers(words: list[dict], diar_turns: list[dict]) -> lis
|
||||
"start_ms": int(cur_start_s * 1000),
|
||||
"end_ms": int(cur_end_s * 1000),
|
||||
"speaker": cur_speaker,
|
||||
"text": "".join(cur_words).strip(),
|
||||
"text": _join_words(cur_words),
|
||||
})
|
||||
cur_words = [wt]
|
||||
cur_speaker = spk
|
||||
@@ -382,7 +400,7 @@ def _merge_words_with_speakers(words: list[dict], diar_turns: list[dict]) -> lis
|
||||
"start_ms": int(cur_start_s * 1000),
|
||||
"end_ms": int(cur_end_s * 1000),
|
||||
"speaker": cur_speaker,
|
||||
"text": "".join(cur_words).strip(),
|
||||
"text": _join_words(cur_words),
|
||||
})
|
||||
|
||||
return blocks
|
||||
|
||||
Reference in New Issue
Block a user