import json
import re
from typing import List, Dict


# -----------------------------
# CONFIG
# -----------------------------
AGENT_SPEAKER_ID = 0   # Deepgram speaker index for agent
CUSTOMER_SPEAKER_ID = 1


# -----------------------------
# TEXT CLEANING
# -----------------------------
def clean_text(text: str) -> str:
    """Normalize grammar, spacing, repetitions"""
    text = text.strip()

    # Remove filler repetitions
    text = re.sub(r"\b(yeah|uh|um)\b", "", text, flags=re.I)

    # Fix spacing
    text = re.sub(r"\s+", " ", text)

    # Fix common mis-hears
    replacements = {
        "pay back": "refund",
        "card good only": "The payment was made by card",
        "once i will update": "I will update this",
    }

    for k, v in replacements.items():
        text = re.sub(k, v, text, flags=re.I)

    return text.strip().capitalize()


# -----------------------------
# ROLE MAPPING
# -----------------------------
def map_speaker(speaker_label: str) -> str:
    """Convert Speaker 1/2 → Agent / Customer"""
    speaker_num = int(re.search(r"\d+", speaker_label).group()) - 1
    return "Agent" if speaker_num == AGENT_SPEAKER_ID else "Customer"


# -----------------------------
# MERGE SEGMENTS
# -----------------------------
def merge_segments(segments: List[Dict]) -> List[Dict]:
    """Merge consecutive segments from same speaker"""
    merged = []

    for seg in segments:
        role = map_speaker(seg["speaker"])
        text = clean_text(seg["text"])

        if not text:
            continue

        if merged and merged[-1]["speaker"] == role:
            merged[-1]["text"] += " " + text
        else:
            merged.append({
                "speaker": role,
                "text": text
            })

    return merged


# -----------------------------
# BUILD CLEAN TRANSCRIPT
# -----------------------------
def build_clean_transcript(segments: List[Dict]) -> str:
    lines = []
    for seg in segments:
        lines.append(f"{seg['speaker']}: {seg['text']}")
    return "\n\n".join(lines)


# -----------------------------
# MAIN FORMATTER
# -----------------------------
def format_output(raw_transcript: str, speaker_segments_json: str) -> Dict:
    speaker_segments = json.loads(speaker_segments_json)

    merged_segments = merge_segments(speaker_segments)
    clean_transcript = build_clean_transcript(merged_segments)

    return {
        "clean_transcript": clean_transcript,
        "clean_speaker_diarization": merged_segments
    }
