a95f27ecd1
Fragments reabsorbed by smoothFragments (e.g. "I" then "need to switch it back") were left as separate transcript lines. Add SpeakerReconciler.mergeAdjacent to join consecutive same-speaker segments within 2s, concatenating their text. Wire it into SessionController.finishBackend AFTER reconcile/LLM naming. The collapse needs no LLM, so finishBackend no longer early-returns when the gateway has no chat model — it runs the collapse and re-persists speakers.json unconditionally, gating only the reconcile and recap passes on the model.
116 lines
6.8 KiB
Swift
116 lines
6.8 KiB
Swift
import XCTest
|
|
@testable import Ten31Transcripts
|
|
|
|
final class SpeakerReconcilerTests: XCTestCase {
|
|
private func seg(_ s: Double, _ e: Double, _ who: String) -> SpeakersFile.Segment {
|
|
.init(start: s, end: e, speaker: who, text: "x")
|
|
}
|
|
private func file(_ speakers: [SpeakersFile.Speaker], _ segs: [SpeakersFile.Segment]) -> SpeakersFile {
|
|
SpeakersFile(sessionId: "s", app: "signal", durationSec: 60, speakers: speakers, segments: segs, models: [:])
|
|
}
|
|
private func sp(_ name: String, _ source: String, _ overlap: Double? = nil) -> SpeakersFile.Speaker {
|
|
.init(name: name, source: source, overlapConfidence: overlap, matchSimilarity: nil)
|
|
}
|
|
|
|
func testCosine() {
|
|
XCTAssertEqual(SpeakerReconciler.cosine([1, 0, 0], [1, 0, 0]), 1.0, accuracy: 0.0001)
|
|
XCTAssertEqual(SpeakerReconciler.cosine([1, 0, 0], [0, 1, 0]), 0.0, accuracy: 0.0001)
|
|
XCTAssertEqual(SpeakerReconciler.cosine([1, 0], [3, 0]), 1.0, accuracy: 0.0001) // scale-invariant
|
|
}
|
|
|
|
func testMergeCollapsesSplitPersonIntoNamedSurvivor() {
|
|
// The 1-on-1 failure: a remote person split into "MH" (named) + "Unknown_0".
|
|
let f = file([sp("Grant", "mic_channel"), sp("MH", "visual", 0.17), sp("Unknown_0", "unmatched")],
|
|
[seg(0, 2, "Grant"), seg(2, 4, "MH"), seg(4, 6, "Unknown_0"), seg(6, 8, "MH")])
|
|
let fps = ["MH": [Float(1), 0, 0], "Unknown_0": [Float(0.99), 0.02, 0], "Grant": [Float(0), 1, 0]]
|
|
let out = SpeakerReconciler.mergeByFingerprint(f, fingerprints: fps,
|
|
protected: SpeakerReconciler.protectedNames(f, selfName: "Grant"), threshold: 0.9)
|
|
XCTAssertEqual(Set(out.speakers.map { $0.name }), ["Grant", "MH"]) // Unknown_0 absorbed
|
|
XCTAssertFalse(out.segments.contains { $0.speaker == "Unknown_0" })
|
|
XCTAssertEqual(out.segments.filter { $0.speaker == "MH" }.count, 3) // 2 MH + 1 ex-Unknown
|
|
XCTAssertEqual(out.segments.filter { $0.speaker == "Grant" }.count, 1) // self untouched
|
|
}
|
|
|
|
func testNeverMergesSelf() {
|
|
// Self (mic_channel) must not merge even with an identical fingerprint.
|
|
let f = file([sp("Grant", "mic_channel"), sp("MH", "visual"), sp("JK", "visual")],
|
|
[seg(0, 2, "Grant"), seg(2, 4, "MH"), seg(4, 6, "JK")])
|
|
let fps = ["Grant": [Float(1), 0, 0], "MH": [Float(1), 0, 0], "JK": [Float(0), 1, 0]]
|
|
let out = SpeakerReconciler.mergeByFingerprint(f, fingerprints: fps,
|
|
protected: SpeakerReconciler.protectedNames(f, selfName: "Grant"), threshold: 0.9)
|
|
XCTAssertTrue(out.speakers.contains { $0.name == "Grant" })
|
|
XCTAssertEqual(out.speakers.count, 3) // nothing merged (MH/JK distinct, Grant protected)
|
|
}
|
|
|
|
func testKeepsDistinctVoices() {
|
|
let f = file([sp("MH", "visual"), sp("JK", "visual")], [seg(0, 2, "MH"), seg(2, 4, "JK")])
|
|
let fps = ["MH": [Float(1), 0, 0], "JK": [Float(0), 1, 0]]
|
|
let out = SpeakerReconciler.mergeByFingerprint(f, fingerprints: fps, protected: [], threshold: 0.8)
|
|
XCTAssertEqual(out.speakers.count, 2)
|
|
}
|
|
|
|
func testSmoothDissolvesFragmentCluster() {
|
|
// "Frag" is mostly micro-segments (the Marty pattern: median ≤ 1s) even though
|
|
// it has one longer stray → still absorbed into the surrounding real speaker.
|
|
let f = file([sp("Grant", "content"), sp("Frag", "content")],
|
|
[seg(0, 4, "Grant"), seg(4.0, 4.3, "Frag"), seg(4.4, 8, "Grant"),
|
|
seg(20, 24, "Grant"), seg(24.0, 24.2, "Frag"), seg(24.3, 28, "Grant"),
|
|
seg(30, 30.3, "Frag"), seg(31, 33, "Frag")]) // 4 Frag: 3 micro + 1 stray 2s
|
|
let out = SpeakerReconciler.smoothFragments(f, protected: [])
|
|
XCTAssertEqual(Set(out.speakers.map { $0.name }), ["Grant"]) // median(Frag)=0.3 ≤1 → dissolved
|
|
XCTAssertFalse(out.segments.contains { $0.speaker == "Frag" })
|
|
}
|
|
|
|
func testSmoothKeepsRealSpeakerWithMostlyLongSegs() {
|
|
let f = file([sp("A", "content")], [seg(0, 3, "A"), seg(3, 6, "A"), seg(6, 6.2, "A")]) // median 3 → real
|
|
XCTAssertEqual(SpeakerReconciler.smoothFragments(f, protected: []).speakers.map { $0.name }, ["A"])
|
|
}
|
|
|
|
func testSmoothProtectsSelfEvenIfAllShort() {
|
|
let f = file([sp("Me", "mic_channel"), sp("A", "content")],
|
|
[seg(0, 0.3, "Me"), seg(1, 4, "A"), seg(4, 4.2, "Me")])
|
|
let out = SpeakerReconciler.smoothFragments(f, protected: ["Me"])
|
|
XCTAssertTrue(out.speakers.contains { $0.name == "Me" }) // self never dissolved
|
|
}
|
|
|
|
func testMergeAdjacentCollapsesSameSpeakerAndJoinsText() {
|
|
let f = file([sp("A", "content"), sp("B", "content")], [
|
|
SpeakersFile.Segment(start: 0, end: 1, speaker: "A", text: "I"),
|
|
SpeakersFile.Segment(start: 1.5, end: 4, speaker: "A", text: "need to switch it back"),
|
|
SpeakersFile.Segment(start: 4.2, end: 6, speaker: "B", text: "Sure"),
|
|
])
|
|
let out = SpeakerReconciler.mergeAdjacent(f, maxGap: 2.0)
|
|
XCTAssertEqual(out.segments.count, 2) // two A's collapsed
|
|
XCTAssertEqual(out.segments[0].speaker, "A")
|
|
XCTAssertEqual(out.segments[0].start, 0, accuracy: 0.001)
|
|
XCTAssertEqual(out.segments[0].end, 4, accuracy: 0.001)
|
|
XCTAssertEqual(out.segments[0].text, "I need to switch it back")
|
|
XCTAssertEqual(out.segments[1].speaker, "B") // different speaker untouched
|
|
}
|
|
|
|
func testMergeAdjacentRespectsMaxGapAndSpeakerBoundaries() {
|
|
let f = file([sp("A", "content")], [
|
|
SpeakersFile.Segment(start: 0, end: 1, speaker: "A", text: "one"),
|
|
SpeakersFile.Segment(start: 5, end: 6, speaker: "A", text: "two"), // gap 4s > maxGap
|
|
])
|
|
let out = SpeakerReconciler.mergeAdjacent(f, maxGap: 2.0)
|
|
XCTAssertEqual(out.segments.count, 2) // large gap → not merged
|
|
|
|
// A B A must stay three segments (intervening speaker breaks the run).
|
|
let g = file([sp("A", "content"), sp("B", "content")], [
|
|
SpeakersFile.Segment(start: 0, end: 1, speaker: "A", text: "a1"),
|
|
SpeakersFile.Segment(start: 1.2, end: 2, speaker: "B", text: "b"),
|
|
SpeakersFile.Segment(start: 2.2, end: 3, speaker: "A", text: "a2"),
|
|
])
|
|
XCTAssertEqual(SpeakerReconciler.mergeAdjacent(g, maxGap: 2.0).segments.count, 3)
|
|
}
|
|
|
|
func testParseNamingDropsNullAndKeepsConfidence() {
|
|
let json = #"{"speakers":[{"current":"MH","name":"Jonathan Kirkwood","confidence":"high"},{"current":"Unknown_0","name":null,"confidence":"low"}]}"#
|
|
let m = SpeakerReconciler.parseNaming(json)
|
|
XCTAssertEqual(m["MH"]?.name, "Jonathan Kirkwood")
|
|
XCTAssertEqual(m["MH"]?.confidence, "high")
|
|
XCTAssertNil(m["Unknown_0"]) // null name → not a rename
|
|
}
|
|
}
|