from dataclasses import dataclass
from datetime import datetime


@dataclass
class Chunk:
    chunk_id: str
    speaker: str
    text: str
    timestamp: datetime | None
    idx: int


def semantic_chunk_by_speaker_turns(call_id: str, turns: list[dict], max_chars: int = 900) -> list[Chunk]:
    chunks: list[Chunk] = []
    if not turns:
        return chunks

    current_speaker = turns[0].get('speaker', 'unknown')
    current_text: list[str] = []
    current_ts = turns[0].get('timestamp')
    chunk_idx = 0

    def flush():
        nonlocal chunk_idx, current_text, current_ts
        text = ' '.join([t.strip() for t in current_text if t and t.strip()]).strip()
        if text:
            chunks.append(
                Chunk(
                    chunk_id=f'{call_id}-chunk-{chunk_idx}',
                    speaker=current_speaker,
                    text=text,
                    timestamp=current_ts,
                    idx=chunk_idx,
                )
            )
            chunk_idx += 1
        current_text = []

    for turn in turns:
        speaker = turn.get('speaker', 'unknown')
        text = (turn.get('text') or '').strip()
        ts = turn.get('timestamp')
        if not text:
            continue

        projected = (' '.join(current_text + [text])).strip()
        if speaker != current_speaker or len(projected) > max_chars:
            flush()
            current_speaker = speaker
            current_ts = ts

        current_text.append(text)

    flush()
    return chunks
