"""Lead-level insights: objections, BANT, and dynamic path-to-conversion actions."""

from __future__ import annotations

import json
import logging
import re
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional

logger = logging.getLogger(__name__)


def split_insight_items(value: Any) -> List[str]:
    text = str(value or "").strip()
    if not text:
        return []
    items = re.split(r"[\n;,|]+", text)
    return [item.strip(" -\t") for item in items if item.strip(" -\t")]


def find_snippet(text: str, keywords: List[str]) -> str:
    if not text:
        return "Not mentioned"
    sentences = re.split(r"(?<=[.!?])\s+|\n+", text)
    for sentence in sentences:
        if any(keyword in sentence.lower() for keyword in keywords):
            return sentence.strip()[:240]
    return "Not mentioned"


def _parse_json_field(value: Any) -> Any:
    if value is None:
        return None
    if isinstance(value, (dict, list)):
        return value
    if isinstance(value, str):
        text = value.strip()
        if not text:
            return None
        try:
            return json.loads(text)
        except Exception:
            return None
    return None


BANT_KEYS = ("budget", "authority", "need", "timeline")

DEFAULT_DATA_CAPTURE_FIELDS = [
    {"field_key": "competitors_mentioned", "display_name": "Competitors Mentioned", "field_type": "text"},
    {"field_key": "products_discussed", "display_name": "Products / Services Discussed", "field_type": "text"},
    {"field_key": "customer_sentiment_trend", "display_name": "Customer Sentiment Trend", "field_type": "text"},
    {"field_key": "key_concerns", "display_name": "Key Concerns", "field_type": "textarea"},
]

FIELD_KEY_KEYWORDS = {
    "competitors_mentioned": ["competitor", "compared with", "compared to", "versus", "vs", "other option", "alternative"],
    "products_discussed": ["product", "service", "plan", "package", "property", "project", "discussed", "looking for"],
}

_STOP_WORDS = {
    "a", "an", "the", "and", "or", "of", "for", "to", "in", "on", "at", "by", "with", "from",
}

EMPTY_BANT_ENTRY = {
    "value": "Not mentioned",
    "reasoning": "",
    "evidence": "",
    "confidence": "low",
}


def _normalize_bant_entry(entry: Any) -> Dict[str, str]:
    if isinstance(entry, dict):
        value = str(entry.get("value") or "Not mentioned").strip() or "Not mentioned"
        return {
            "value": value,
            "reasoning": str(entry.get("reasoning") or "").strip(),
            "evidence": str(entry.get("evidence") or "").strip(),
            "confidence": str(entry.get("confidence") or "low").strip().lower() or "low",
        }
    text = str(entry or "").strip()
    if not text:
        return dict(EMPTY_BANT_ENTRY)
    return {
        "value": text,
        "reasoning": "",
        "evidence": "",
        "confidence": "low",
    }


def _is_bant_mentioned(entry: Dict[str, str]) -> bool:
    value = str(entry.get("value") or "").strip().lower()
    return bool(value) and value not in {"not mentioned", "unknown", "n/a", "-"}


def _resolve_call_bant_profile(call: Dict[str, Any]) -> Dict[str, Any]:
    profile = call.get("bant_profile")
    if isinstance(profile, dict) and profile:
        return profile

    raw = _parse_json_field(call.get("raw_response")) or {}
    customer_profile = raw.get("customer_profile")
    if isinstance(customer_profile, dict) and customer_profile:
        return customer_profile
    return {}


def _aggregate_lead_bant(signals: List[Dict[str, Any]]) -> Dict[str, Any]:
    profile = {key: dict(EMPTY_BANT_ENTRY) for key in BANT_KEYS}
    summary = ""

    for signal in signals or []:
        if not summary:
            summary = str(signal.get("customer_profile_summary") or signal.get("bant_summary") or "").strip()
        bant_profile = signal.get("bant_profile") or {}
        if not isinstance(bant_profile, dict):
            continue
        for key in BANT_KEYS:
            source_key = "needs" if key == "need" and "need" not in bant_profile and "needs" in bant_profile else key
            if source_key not in bant_profile:
                continue
            normalized = _normalize_bant_entry(bant_profile.get(source_key))
            if _is_bant_mentioned(profile[key]):
                continue
            if _is_bant_mentioned(normalized):
                profile[key] = normalized

    return {"summary": summary, "profile": profile}


def _first_sentence(text: str, max_len: int = 160) -> str:
    cleaned = str(text or "").strip()
    if not cleaned:
        return ""
    parts = re.split(r"(?<=[.!?])\s+", cleaned, maxsplit=1)
    sentence = parts[0].strip()
    if len(sentence) > max_len:
        return sentence[: max_len - 3].rstrip() + "..."
    return sentence


def _truncate(text: str, max_len: int = 80) -> str:
    cleaned = str(text or "").strip()
    if len(cleaned) <= max_len:
        return cleaned
    return cleaned[: max_len - 3].rstrip() + "..."


def _collect_call_signals(calls: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    signals: List[Dict[str, Any]] = []
    for call in calls or []:
        raw = _parse_json_field(call.get("raw_response")) or {}
        bant_profile = _resolve_call_bant_profile(call)
        bant = {}
        for key in BANT_KEYS:
            source_key = "needs" if key == "need" and "need" not in bant_profile and "needs" in bant_profile else key
            if source_key in bant_profile:
                bant[key] = _normalize_bant_entry(bant_profile.get(source_key))["value"]
        signals.append(
            {
                "callid": call.get("callid"),
                "call_starttime": call.get("call_starttime"),
                "direction": call.get("direction"),
                "agentname": call.get("agentname"),
                "summary": str(call.get("summary") or raw.get("overall_summary") or "").strip(),
                "intent": str(call.get("call_purpose") or raw.get("call_purpose") or "").strip(),
                "objections": split_insight_items(call.get("objections_concerns") or raw.get("objections_concerns")),
                "objection_type": str(call.get("objection_type") or raw.get("objection_type") or "").strip(),
                "sentiment": str(call.get("sentiment") or raw.get("sentiment") or "neutral").strip().lower(),
                "quality_score": call.get("quality_score"),
                "bant": bant,
                "bant_profile": bant_profile,
                "bant_summary": str(call.get("bant_summary") or raw.get("customer_profile_summary") or "").strip(),
                "customer_profile_summary": str(
                    call.get("bant_summary")
                    or raw.get("customer_profile_summary")
                    or call.get("customer_profile_summary")
                    or ""
                ).strip(),
                "next_steps": raw.get("next_steps") if isinstance(raw.get("next_steps"), list) else [],
            }
        )
    return signals


def _infer_sales_stage(details: Dict[str, Any], signals: List[Dict[str, Any]]) -> str:
    crm_status = str(details.get("lead_status") or "").strip()
    if crm_status and crm_status not in {"-", "Unknown", "unknown"}:
        return crm_status

    latest = signals[0] if signals else {}
    intent = (latest.get("intent") or "").lower()
    quality = latest.get("quality_score")
    objections = latest.get("objections") or []

    if any(word in intent for word in ("purchase", "buy", "close", "payment", "booking", "order")):
        return "Closing"
    if objections and quality is not None and float(quality) < 55:
        return "Objection handling"
    if any(word in intent for word in ("demo", "proposal", "quote", "pricing")):
        return "Evaluation"
    if any(word in intent for word in ("follow", "callback", "information", "inquiry")):
        return "Discovery"
    if len(signals) >= 2:
        return "Nurture"
    return "Initial contact"


def _estimate_conversion_probability(details: Dict[str, Any], signals: List[Dict[str, Any]]) -> int:
    scores = [float(s["quality_score"]) for s in signals if s.get("quality_score") is not None]
    avg_quality = sum(scores) / len(scores) if scores else float(details.get("avg_quality_score") or 0)

    sentiments = [str(s.get("sentiment") or "neutral").lower() for s in signals]
    positive = sum(1 for s in sentiments if "positive" in s)
    negative = sum(1 for s in sentiments if "negative" in s)
    neutral = len(sentiments) - positive - negative

    probability = avg_quality * 0.55
    probability += positive * 8
    probability -= negative * 12
    if neutral and not positive and not negative:
        probability += 5

    latest = signals[0] if signals else {}
    if latest.get("objections"):
        probability -= 10
    if latest.get("intent") and any(
        word in latest["intent"].lower() for word in ("demo", "visit", "quote", "proposal", "booking")
    ):
        probability += 12

    stage = _infer_sales_stage(details, signals).lower()
    if any(word in stage for word in ("hot", "qualified", "closing", "won")):
        probability += 10
    elif any(word in stage for word in ("lost", "unqualified", "junk")):
        probability -= 25

    conversation_count = int(details.get("total_conversations") or len(signals) or 0)
    if conversation_count >= 3 and avg_quality >= 60:
        probability += 5

    return int(max(5, min(95, round(probability))))


def _bant_gaps(signals: List[Dict[str, Any]]) -> List[str]:
    gaps: List[str] = []
    if not signals:
        return ["qualification details from the customer"]
    latest_bant = signals[0].get("bant") or {}
    mapping = {
        "budget": "budget or pricing expectations",
        "authority": "decision-maker and approval process",
        "need": "specific needs and use case",
        "timeline": "timeline and urgency",
    }
    for key, label in mapping.items():
        value = str(latest_bant.get(key) or "Not mentioned").strip()
        if not value or value.lower() in {"not mentioned", "unknown", "n/a", "-"}:
            gaps.append(label)
    return gaps


def _normalize_action_steps(raw_steps: Any) -> List[Dict[str, str]]:
    if not isinstance(raw_steps, list):
        return []
    normalized: List[Dict[str, str]] = []
    for step in raw_steps:
        if not isinstance(step, dict):
            continue
        title = str(step.get("title") or step.get("action") or "").strip()
        say = str(step.get("say") or step.get("script") or step.get("message") or "").strip()
        rationale = str(step.get("rationale") or step.get("works_because") or step.get("reason") or "").strip()
        if title and say:
            normalized.append({"title": title, "say": say, "rationale": rationale})
        if len(normalized) >= 3:
            break
    return normalized


def _generate_action_steps_heuristic(
    details: Dict[str, Any],
    signals: List[Dict[str, Any]],
    objections: List[Dict[str, Any]],
) -> List[Dict[str, str]]:
    if not signals:
        return []

    latest = signals[0]
    stage = _infer_sales_stage(details, signals)
    probability = _estimate_conversion_probability(details, signals)
    steps: List[Dict[str, str]] = []

    summary_hook = _first_sentence(latest.get("summary") or "")
    intent = latest.get("intent") or "your recent inquiry"
    if summary_hook:
        opener = (
            f"I reviewed our last conversation where we discussed {summary_hook.rstrip('.')}. "
            f"I wanted to pick up from there and help you with the next step."
        )
    else:
        opener = (
            f"I am following up on our recent call about {intent.lower()}. "
            "I have a few specific points based on what you shared."
        )
    steps.append(
        {
            "title": f"Reconnect on { _truncate(intent, 60) or 'the last conversation'}",
            "say": opener,
            "rationale": (
                f"Latest call intent is \"{intent}\" with {latest.get('sentiment', 'neutral')} sentiment; "
                "a contextual opener shows you listened and reduces repetition."
            ),
        }
    )

    unresolved = [obj for obj in objections if not obj.get("customer_satisfied")]
    if unresolved:
        top = unresolved[0]
        objection_text = top.get("objection") or "the concern you raised"
        steps.append(
            {
                "title": f"Address: {_truncate(objection_text, 55)}",
                "say": (
                    f"You had mentioned {objection_text.lower().rstrip('.')}. "
                    "I want to make sure we resolve that clearly before moving forward — "
                    "can I walk you through how we handle this?"
                ),
                "rationale": (
                    f"Unresolved objection detected ({top.get('type') or 'concern'}). "
                    "Removing this blocker is the highest-impact move before advancing the deal."
                ),
            }
        )
    elif latest.get("objections"):
        steps.append(
            {
                "title": "Confirm prior concerns are resolved",
                "say": (
                    f"Last time you raised {latest['objections'][0].lower()}. "
                    "Does that still feel addressed, or is anything still unclear?"
                ),
                "rationale": "Even handled objections can resurface; confirming closure protects conversion momentum.",
            }
        )

    gaps = _bant_gaps(signals)
    next_due = str(details.get("next_task_due_date") or "").strip()

    if probability >= 70 and not unresolved:
        steps.append(
            {
                "title": "Advance toward commitment",
                "say": (
                    f"Based on our conversations, you seem close to a decision (stage: {stage}, "
                    f"~{probability}% conversion likelihood). "
                    "Would it help to schedule a demo or finalize the next commercial step this week?"
                ),
                "rationale": (
                    f"Strong buying signals (quality/sentiment) and stage \"{stage}\" suggest pushing for a concrete next milestone."
                ),
            }
        )
    elif gaps:
        gap_text = gaps[0]
        steps.append(
            {
                "title": f"Qualify {gap_text.split(' ')[0]}",
                "say": (
                    f"To recommend the best option, I still need clarity on {gap_text}. "
                    "Could you share a bit more so I can tailor the next step for you?"
                ),
                "rationale": (
                    f"Stage \"{stage}\" with ~{probability}% conversion likelihood — "
                    f"filling the {gap_text} gap will sharpen fit and next-step timing."
                ),
            }
        )
    elif next_due and next_due != "-":
        steps.append(
            {
                "title": "Align on scheduled follow-up",
                "say": (
                    f"I have us marked for follow-up around {next_due}. "
                    "Does that timing still work, or should we adjust based on where things stand now?"
                ),
                "rationale": "Keeps CRM task timing aligned with the customer's current intent and availability.",
            }
        )
    else:
        sentiment = latest.get("sentiment") or "neutral"
        steps.append(
            {
                "title": "Set a concrete next step",
                "say": (
                    f"Given your {sentiment} engagement so far, I'd like to propose a clear next step — "
                    "a short follow-up call or sharing specific material. What would be most useful for you?"
                ),
                "rationale": (
                    f"Stage \"{stage}\" at ~{probability}% conversion likelihood — "
                    "locking a specific next action prevents the lead from going cold."
                ),
            }
        )

    return steps[:3]


def _generate_action_steps_llm(
    invoke_chat: Callable[..., Optional[str]],
    details: Dict[str, Any],
    signals: List[Dict[str, Any]],
    objections: List[Dict[str, Any]],
) -> List[Dict[str, str]]:
    if not signals:
        return []

    stage = _infer_sales_stage(details, signals)
    probability = _estimate_conversion_probability(details, signals)
    context = {
        "lead_name": details.get("lead_name"),
        "lead_status": details.get("lead_status"),
        "owner_name": details.get("owner_name"),
        "next_task_due_date": details.get("next_task_due_date"),
        "total_conversations": details.get("total_conversations"),
        "avg_quality_score": details.get("avg_quality_score"),
        "inferred_stage": stage,
        "conversion_probability_pct": probability,
        "calls": signals[:5],
        "objections": objections[:6],
        "bant_gaps": _bant_gaps(signals),
    }
    prompt = (
        "You are a sales coach generating lead follow-up guidance.\n"
        "Use ONLY the evidence in LEAD CONTEXT. Do not invent facts, pricing, or commitments.\n"
        "Return STRICT JSON with key action_steps: an array of 2 or 3 objects.\n"
        "Each object must have: title (short action name), say (exact words the agent should use on the next call), "
        "rationale (one sentence explaining why this works based on summary/intent/objections/sentiment/stage/probability).\n"
        "Prioritize unresolved objections, then stage-appropriate advancement, then qualification gaps.\n"
        "Keep each say under 280 characters.\n\n"
        f"LEAD CONTEXT:\n{json.dumps(context, ensure_ascii=True, default=str)}"
    )
    try:
        answer = invoke_chat(prompt)
    except Exception as exc:
        logger.warning("Lead action-step LLM failed: %s", exc)
        return []

    if not answer:
        return []

    start = answer.find("{")
    end = answer.rfind("}")
    if start == -1 or end <= start:
        return []

    try:
        parsed = json.loads(answer[start : end + 1])
    except Exception:
        return []

    steps = _normalize_action_steps(parsed.get("action_steps"))
    return steps if len(steps) >= 2 else []


NOT_MENTIONED = "Not Mentioned"

_YES_NO_FIELD_HINTS = (
    "requested",
    "required",
    "confirmed",
    "scheduled",
    "interested",
    "approved",
    "available",
    "eligible",
    "agreed",
    "accepted",
    "declined",
    "cancelled",
    "canceled",
)


def _keywords_for_field(field: Dict[str, Any]) -> List[str]:
    field_key = str(field.get("field_key") or "").strip().lower()
    if field_key in FIELD_KEY_KEYWORDS:
        return list(FIELD_KEY_KEYWORDS[field_key])

    display_name = str(field.get("display_name") or field_key or "").strip().lower()
    words = re.split(r"[^a-z0-9]+", display_name)
    keywords = [word for word in words if len(word) > 2 and word not in _STOP_WORDS]
    if keywords:
        return keywords
    if field_key:
        return [token for token in field_key.split("_") if token and token not in _STOP_WORDS]
    return []


def _is_not_mentioned(value: Any) -> bool:
    text = str(value or "").strip().lower()
    return not text or text in {"not mentioned", "not mention", "n/a", "na", "unknown", "-", "none", "nil"}


def _is_yes_no_field(field: Dict[str, Any]) -> bool:
    field_key = str(field.get("field_key") or "").strip().lower()
    label = str(field.get("display_name") or "").strip().lower()
    haystack = f"{field_key} {label}"
    return any(hint in haystack for hint in _YES_NO_FIELD_HINTS)


def _infer_yes_no_from_text(text: str) -> str:
    lowered = str(text or "").strip().lower()
    if not lowered:
        return NOT_MENTIONED
    if lowered in {"yes", "no"}:
        return lowered.title()
    negative_markers = (
        " not ",
        "n't ",
        " never ",
        " cannot ",
        " can't ",
        " won't ",
        " dont ",
        " don't ",
        " no ",
        " declined ",
        " refuse ",
        " unavailable ",
    )
    padded = f" {lowered} "
    if any(marker in padded for marker in negative_markers):
        return "No"
    positive_markers = (
        " yes",
        "yeah",
        "sure",
        "confirmed",
        "agreed",
        "accepted",
        "scheduled",
        "will ",
        "can ",
        "okay",
        "ok ",
    )
    if any(marker in lowered for marker in positive_markers):
        return "Yes"
    return NOT_MENTIONED


def _find_matching_sentence(text: str, keywords: List[str]) -> str:
    if not text or not keywords:
        return ""
    sentences = re.split(r"(?<=[.!?])\s+|\n+", text)
    for sentence in sentences:
        lowered = sentence.lower()
        if any(keyword in lowered for keyword in keywords):
            return sentence.strip()
    return ""


def _smart_compact_from_sentence(sentence: str, field: Dict[str, Any]) -> str:
    cleaned = str(sentence or "").strip()
    if not cleaned:
        return NOT_MENTIONED

    lowered = cleaned.lower()
    field_key = str(field.get("field_key") or "").strip().lower()

    visit_match = re.search(
        r"visit(?:ing)?\s+(?:your\s+|the\s+|our\s+)?([a-z][a-z\s]{0,20}?)(?:\s+(?:tomorrow|today|next|on\b)|[.?!,]|$)",
        lowered,
    )
    if visit_match and any(token in field_key for token in ("next", "step", "visit", "action", "follow")):
        place = visit_match.group(1).strip()
        if place:
            return f"{place.title()} Visit"

    demo_match = re.search(r"\b(?:demo|demonstration)\b", lowered)
    if demo_match and "demo" in field_key:
        return "Yes"

    competitor_match = re.search(
        r"(?:competitor|compared with|compared to|versus|vs\.?)\s+([A-Za-z0-9][A-Za-z0-9\s&.-]{0,30})",
        cleaned,
        flags=re.IGNORECASE,
    )
    if competitor_match and "competitor" in field_key:
        return competitor_match.group(1).strip(" .,!?")


def _compact_phrase_from_sentence(
    sentence: str,
    keywords: List[str],
    field: Optional[Dict[str, Any]] = None,
    max_words: int = 5,
) -> str:
    cleaned = str(sentence or "").strip()
    if not cleaned:
        return NOT_MENTIONED

    if field:
        smart = _smart_compact_from_sentence(cleaned, field)
        if smart and smart != NOT_MENTIONED:
            return smart

    lowered = cleaned.lower()
    for keyword in keywords:
        idx = lowered.find(keyword.lower())
        if idx < 0:
            continue
        tail = cleaned[idx + len(keyword) :].strip(" :,-–—.")
        if tail:
            words = re.split(r"\s+", tail)
            return " ".join(words[:max_words]).strip(".,!?")

    words = re.split(r"\s+", cleaned)
    return " ".join(words[:max_words]).strip(".,!?")


def _extract_typed_value(sentence: str, field_type: str) -> str:
    field_type = str(field_type or "text").strip().lower()
    if field_type == "phone":
        match = re.search(r"\+?\d[\d\s\-()]{8,}\d", sentence)
        if match:
            return re.sub(r"\s+", "", match.group(0))
    if field_type == "email":
        match = re.search(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", sentence)
        if match:
            return match.group(0)
    if field_type == "number":
        match = re.search(r"\b\d+(?:[.,]\d+)?\b", sentence)
        if match:
            return match.group(0)
    if field_type == "date":
        match = re.search(
            r"\b(?:today|tomorrow|yesterday|\d{1,2}[/-]\d{1,2}(?:[/-]\d{2,4})?|"
            r"(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\s+\d{1,2})\b",
            sentence,
            flags=re.IGNORECASE,
        )
        if match:
            return match.group(0).title()
    return ""


def _normalize_capture_value(value: Any, field: Optional[Dict[str, Any]] = None) -> str:
    text = str(value or "").strip()
    if _is_not_mentioned(text):
        return NOT_MENTIONED
    return _enforce_compact_value(text, field or {})


def _enforce_compact_value(text: str, field: Dict[str, Any]) -> str:
    text = str(text or "").strip()
    if _is_not_mentioned(text):
        return NOT_MENTIONED

    if _is_yes_no_field(field):
        inferred = _infer_yes_no_from_text(text)
        if inferred != NOT_MENTIONED:
            return inferred

    field_type = str(field.get("field_type") or "text").strip().lower()
    typed = _extract_typed_value(text, field_type)
    if typed:
        return typed

    words = re.split(r"\s+", text)
    if len(words) > 6:
        text = " ".join(words[:6])
    text = text.strip(".,!? ")
    if len(text) > 60:
        text = text[:57].rstrip() + "..."
    return text or NOT_MENTIONED


def _heuristic_data_capture_value(
    field: Dict[str, Any],
    *,
    transcript_text: str,
    sentiment_trend: str,
    objections: List[Dict[str, Any]],
) -> str:
    field_key = str(field.get("field_key") or "").strip().lower()
    if field_key == "customer_sentiment_trend":
        return sentiment_trend or NOT_MENTIONED
    if field_key == "key_concerns":
        phrases = []
        for item in objections[:4]:
            objection = str(item.get("objection") or "").strip()
            if not objection:
                continue
            phrases.append(_compact_phrase_from_sentence(objection, _keywords_for_field(field), field=field, max_words=4))
        joined = ", ".join(phrase for phrase in phrases if phrase and phrase != NOT_MENTIONED)
        return joined or NOT_MENTIONED

    keywords = _keywords_for_field(field)
    field_key = str(field.get("field_key") or "").strip().lower()
    if any(token in field_key for token in ("next", "step", "visit", "action", "follow")):
        keywords = list(dict.fromkeys(keywords + ["visit", "visiting", "tomorrow", "callback", "meeting"]))
    if not keywords:
        return NOT_MENTIONED

    sentence = _find_matching_sentence(transcript_text, keywords)
    if not sentence:
        return NOT_MENTIONED

    if _is_yes_no_field(field):
        return _infer_yes_no_from_text(sentence)

    typed = _extract_typed_value(sentence, field.get("field_type"))
    if typed:
        return typed

    return _enforce_compact_value(_compact_phrase_from_sentence(sentence, keywords, field=field), field)


def _generate_data_capture_llm(
    invoke_chat: Callable[..., Optional[str]],
    fields: List[Dict[str, Any]],
    transcript_text: str,
) -> Dict[str, str]:
    if not fields or not transcript_text.strip():
        return {}

    field_specs = [
        {
            "field_key": field.get("field_key"),
            "label": field.get("display_name") or field.get("field_key"),
            "type": field.get("field_type") or "text",
        }
        for field in fields
        if field.get("field_key")
    ]
    if not field_specs:
        return {}

    clipped_transcript = transcript_text[-12000:]
    prompt = (
        "Extract only the specific value for each field from the conversation transcript.\n"
        "Use ONLY facts stated in the transcript. Do not invent details.\n"
        "Return STRICT JSON with key data_capture: an object mapping each field_key to a string value.\n\n"
        "Rules:\n"
        "- Do not copy complete sentences from the transcript.\n"
        "- Return concise normalized values (1-5 words where possible).\n"
        "- Use short labels or entities, not narration.\n"
        "- If information is unavailable, return exactly \"Not Mentioned\".\n"
        "- If a yes/no answer can be inferred, return only \"Yes\" or \"No\".\n\n"
        "Examples:\n"
        "Transcript: \"I will visit your office tomorrow.\"\n"
        "Field next_steps -> \"Office Visit\"\n\n"
        "Transcript: \"Can you arrange a demo next week?\"\n"
        "Field demo_requested -> \"Yes\"\n\n"
        f"FIELDS:\n{json.dumps(field_specs, ensure_ascii=True)}\n\n"
        f"TRANSCRIPT:\n{clipped_transcript}"
    )
    try:
        answer = invoke_chat(prompt)
    except Exception as exc:
        logger.warning("Lead data-capture LLM failed: %s", exc)
        return {}

    if not answer:
        return {}

    start = answer.find("{")
    end = answer.rfind("}")
    if start == -1 or end <= start:
        return {}

    try:
        parsed = json.loads(answer[start : end + 1])
    except Exception:
        return {}

    raw_capture = parsed.get("data_capture")
    if not isinstance(raw_capture, dict):
        return {}

    output: Dict[str, str] = {}
    for field in fields:
        field_key = str(field.get("field_key") or "").strip()
        if not field_key:
            continue
        output[field_key] = _normalize_capture_value(raw_capture.get(field_key), field)
    return output


def _build_data_capture(
    fields: Optional[List[Dict[str, Any]]],
    *,
    transcript_text: str,
    sentiment_trend: str,
    objections: List[Dict[str, Any]],
    invoke_chat: Optional[Callable[..., Optional[str]]] = None,
    use_llm: bool = False,
) -> Dict[str, str]:
    active_fields = fields or DEFAULT_DATA_CAPTURE_FIELDS
    if not active_fields:
        return {}

    capture: Dict[str, str] = {}
    if use_llm and invoke_chat and transcript_text.strip():
        capture = _generate_data_capture_llm(invoke_chat, active_fields, transcript_text)

    for field in active_fields:
        field_key = str(field.get("field_key") or "").strip()
        if not field_key:
            continue
        existing = capture.get(field_key)
        if field_key not in capture or _is_not_mentioned(existing):
            capture[field_key] = _heuristic_data_capture_value(
                field,
                transcript_text=transcript_text,
                sentiment_trend=sentiment_trend,
                objections=objections,
            )
        else:
            capture[field_key] = _normalize_capture_value(existing, field)

    return capture


def build_lead_insights(
    details: Dict[str, Any],
    invoke_chat: Optional[Callable[..., Optional[str]]] = None,
    use_llm: bool = False,
    data_capture_fields: Optional[List[Dict[str, Any]]] = None,
) -> Dict[str, Any]:
    calls = details.get("calls") or []
    signals = _collect_call_signals(calls)
    transcript_text = "\n".join(str(call.get("transcript") or "") for call in calls)
    summaries = [signal["summary"] for signal in signals if signal.get("summary")]

    objections: List[Dict[str, Any]] = []
    seen_objections = set()
    for call in calls:
        for objection in split_insight_items(call.get("objections_concerns")):
            key = objection.lower()
            if key in seen_objections:
                continue
            seen_objections.add(key)
            quality = call.get("quality_score")
            quality_num = float(quality) if quality is not None else 0
            objections.append(
                {
                    "objection": objection,
                    "type": call.get("call_purpose") or "",
                    "how_handled": call.get("summary") or "",
                    "customer_satisfied": quality_num >= 60,
                }
            )

    previous_summary = summaries[:5]
    if not previous_summary and calls:
        previous_summary = [
            (
                f"{call.get('direction', 'Call').title()} call with {call.get('agentname') or 'agent'} "
                f"on {call.get('call_starttime') or 'unknown date'}."
            )
            for call in calls[:5]
        ]

    stage = _infer_sales_stage(details, signals)
    conversion_probability = _estimate_conversion_probability(details, signals)

    action_steps: List[Dict[str, str]] = []
    if use_llm and invoke_chat:
        action_steps = _generate_action_steps_llm(invoke_chat, details, signals, objections)
    if len(action_steps) < 2:
        action_steps = _generate_action_steps_heuristic(details, signals, objections)

    sentiment_trend = "Positive"
    if signals:
        latest_sentiment = signals[0].get("sentiment") or "neutral"
        if "negative" in latest_sentiment:
            sentiment_trend = "Needs attention"
        elif "neutral" in latest_sentiment:
            sentiment_trend = "Mixed / neutral"
    elif float(details.get("avg_quality_score") or 0) < 60:
        sentiment_trend = "Needs follow-up"

    aggregated_bant = _aggregate_lead_bant(signals)
    if not _is_bant_mentioned(aggregated_bant["profile"]["budget"]) and transcript_text:
        fallback = {
            "budget": find_snippet(
                transcript_text, ["budget", "price", "pricing", "cost", "amount", "expensive"]
            ),
            "authority": find_snippet(
                transcript_text, ["decision", "approval", "approve", "manager", "owner", "authority"]
            ),
            "need": find_snippet(
                transcript_text, ["need", "requirement", "looking for", "interested", "require"]
            ),
            "timeline": find_snippet(
                transcript_text, ["timeline", "when", "date", "month", "week", "soon", "urgent"]
            ),
        }
        for key in BANT_KEYS:
            if not _is_bant_mentioned(aggregated_bant["profile"][key]):
                aggregated_bant["profile"][key] = _normalize_bant_entry(fallback[key])

    data_capture = _build_data_capture(
        data_capture_fields,
        transcript_text=transcript_text,
        sentiment_trend=sentiment_trend,
        objections=objections,
        invoke_chat=invoke_chat,
        use_llm=use_llm,
    )

    return {
        "generated_at": datetime.utcnow().isoformat() + "Z",
        "summary": " ".join(previous_summary[:2]) or "Insights generated from available lead conversations.",
        "objections": objections,
        "bant": aggregated_bant,
        "data_capture": data_capture,
        "path_to_conversion": {
            "previous_summary": previous_summary,
            "stage": stage,
            "conversion_probability_pct": conversion_probability,
            "action_steps": action_steps[:3],
        },
    }
