"""Manual lead recording uploads and CSV bulk import into {bid}_raw_calls."""

from __future__ import annotations

import base64
import csv
import json
import logging
import os
import re
import secrets
from datetime import datetime
from io import StringIO
from typing import Any, Dict, List, Optional, Set
from urllib.parse import quote

logger = logging.getLogger(__name__)

RECORDING_UPLOAD_ROOT = os.path.join(
    os.path.dirname(os.path.abspath(__file__)),
    "recording_uploads",
)

CSV_HEADER_ALIASES = {
    "callid": "callid",
    "call_id": "callid",
    "number": "customer_callinfo",
    "phone": "customer_callinfo",
    "mobile": "customer_callinfo",
    "customer_number": "customer_callinfo",
    "customer_callinfo": "customer_callinfo",
    "start_date": "call_starttime",
    "start_time": "call_starttime",
    "call_starttime": "call_starttime",
    "call_start_time": "call_starttime",
    "end_date": "call_endtime",
    "end_time": "call_endtime",
    "call_endtime": "call_endtime",
    "call_end_time": "call_endtime",
    "call_url": "fileurl",
    "fileurl": "fileurl",
    "file_url": "fileurl",
    "filename": "fileurl",
    "recording_url": "fileurl",
    "url": "fileurl",
    "agent": "agentname",
    "agentname": "agentname",
    "agent_name": "agentname",
    "callername": "agentname",
    "caller_name": "agentname",
    "groupname": "groupname",
    "group_name": "groupname",
    "location": "groupname",
    "direction": "direction",
    "call_status": "call_status",
    "dialstatus": "call_status",
    "status": "call_status",
}


def _normalize_header(value: Any) -> str:
    if value is None:
        return ""
    header = str(value).strip().lower()
    header = header.replace(" ", "_").replace("-", "_")
    return re.sub(r"[^a-z0-9_]", "", header)


def _parse_datetime(value: Any) -> Optional[datetime]:
    if value is None:
        return None
    text = str(value).strip()
    if not text:
        return None
    for fmt in (
        "%Y-%m-%d %H:%M:%S",
        "%Y-%m-%d %H:%M",
        "%Y-%m-%dT%H:%M:%S",
        "%Y-%m-%dT%H:%M",
        "%d-%m-%Y %H:%M:%S",
        "%d-%m-%Y %H:%M",
        "%d-%m-%Y %I:%M:%S %p",
        "%d-%m-%Y %I:%M %p",
        "%Y/%m/%d %H:%M:%S",
        "%Y/%m/%d %H:%M",
        "%Y/%m/%d %I:%M:%S %p",
        "%Y/%m/%d %I:%M %p",
        "%d/%m/%Y %H:%M:%S",
        "%d/%m/%Y %H:%M",
        "%d/%m/%Y %I:%M:%S %p",
        "%d/%m/%Y %I:%M %p",
        "%m/%d/%Y %H:%M:%S",
        "%m/%d/%Y %H:%M",
        "%m/%d/%Y %I:%M:%S %p",
        "%m/%d/%Y %I:%M %p",
    ):
        try:
            return datetime.strptime(text, fmt)
        except ValueError:
            continue
    try:
        return datetime.fromisoformat(text.replace("Z", "+00:00").split("+")[0])
    except ValueError:
        return None


def _normalize_callid(value: Any) -> Optional[str]:
    if value is None:
        return None
    if isinstance(value, float) and value.is_integer():
        return str(int(value))
    text = str(value).strip()
    return text or None


def _table_columns(cursor, table_name: str) -> Set[str]:
    cursor.execute(f"SHOW COLUMNS FROM `{table_name}`")
    return {str(row.get("Field") or row[0]) for row in (cursor.fetchall() or [])}


def _public_base_url(config: Dict[str, Any]) -> str:
    return str(
        os.getenv("PUBLIC_BASE_URL")
        or config.get("PUBLIC_BASE_URL")
        or ""
    ).rstrip("/")


def save_recording_bytes(
    bid: str,
    file_name: str,
    raw_bytes: bytes,
    config: Dict[str, Any],
) -> str:
    """Persist audio and return a URL stored on the raw_calls row."""
    safe_bid = re.sub(r"[^A-Za-z0-9_-]", "_", str(bid).strip()) or "unknown"
    safe_name = re.sub(r"[^A-Za-z0-9._-]", "_", os.path.basename(file_name or "recording")) or "recording"
    rel_dir = safe_bid
    dest_dir = os.path.join(RECORDING_UPLOAD_ROOT, rel_dir)
    os.makedirs(dest_dir, exist_ok=True)
    token = secrets.token_hex(4)
    dest_name = f"{token}_{safe_name}"
    dest_path = os.path.join(dest_dir, dest_name)
    with open(dest_path, "wb") as fh:
        fh.write(raw_bytes)

    rel_path = f"{rel_dir}/{dest_name}"
    base = _public_base_url(config)
    if base:
        return f"{base}/recording-uploads/{quote(rel_path)}"
    return f"/recording-uploads/{rel_path}"


def parse_csv_bulk_rows(csv_text: str) -> List[Dict[str, Any]]:
    """Parse bulk upload CSV into normalized raw_calls field dicts."""
    if not csv_text or not str(csv_text).strip():
        raise ValueError("CSV file is empty")

    reader = csv.DictReader(StringIO(csv_text))
    if not reader.fieldnames:
        raise ValueError("CSV header row is missing")

    header_map: Dict[str, str] = {}
    for raw in reader.fieldnames:
        norm = _normalize_header(raw)
        canonical = CSV_HEADER_ALIASES.get(norm)
        if canonical:
            header_map[str(raw)] = canonical

    if "callid" not in header_map.values():
        raise ValueError("CSV must include a callid column")

    records: List[Dict[str, Any]] = []
    for row in reader:
        if not row or all(not str(v or "").strip() for v in row.values()):
            continue
        record: Dict[str, Any] = {}
        for src_key, canonical in header_map.items():
            val = row.get(src_key)
            if isinstance(val, str):
                val = val.strip() or None
            record[canonical] = val

        callid = _normalize_callid(record.get("callid"))
        if not callid:
            continue
        record["callid"] = callid

        if record.get("call_starttime"):
            dt = _parse_datetime(record["call_starttime"])
            record["call_starttime"] = dt
        if record.get("call_endtime"):
            dt = _parse_datetime(record["call_endtime"])
            record["call_endtime"] = dt
        if not record.get("call_starttime"):
            record["call_starttime"] = datetime.now()
        if not record.get("call_endtime"):
            record["call_endtime"] = record["call_starttime"]

        fileurl = record.get("fileurl")
        if not fileurl:
            continue
        record["fileurl"] = str(fileurl).strip()
        records.append(record)

    if not records:
        raise ValueError("No valid rows found in CSV (need callid and call_url per row)")
    return records


def import_raw_calls_from_csv(
    db_handler,
    bid: str,
    csv_text: str,
    groupname: Optional[str] = None,
) -> Dict[str, Any]:
    """Insert or update rows from a leads bulk-upload CSV."""
    rows = parse_csv_bulk_rows(csv_text)
    table_name = f"{bid}_raw_calls"

    with db_handler.get_connection() as conn:
        cursor = conn.cursor()
        cursor.execute("SHOW TABLES LIKE %s", (table_name,))
        if not cursor.fetchone():
            raise ValueError(f"Table {table_name} does not exist")

        columns = _table_columns(cursor, table_name)
        skipped = 0

        for record in rows:
            record["bid"] = str(bid)
            record.setdefault("status", 0)
            record.setdefault("transcription_status", 0)
            record.setdefault("transcription_requested", 1)
            record.setdefault("selected_for_processing", 1)
            record.setdefault("call_status", "ANSWER")
            record.setdefault("direction", "inbound")
            if groupname and "groupname" in columns:
                record["groupname"] = groupname
            if "extra_fields" in columns:
                record["extra_fields"] = json.dumps(
                    {"source": "manual_upload", "bulk_csv": True}
                )

        col_order = [
            "bid",
            "callid",
            "fileurl",
            "customer_callinfo",
            "call_starttime",
            "call_endtime",
            "status",
            "call_status",
            "direction",
            "groupname",
            "agentname",
            "transcription_status",
            "transcription_requested",
            "selected_for_processing",
            "extra_fields",
        ]
        present = [c for c in col_order if c in columns]

        values = []
        for record in rows:
            if not record.get("fileurl") or not record.get("callid"):
                skipped += 1
                continue
            values.append([record.get(col) for col in present])

        if not values:
            raise ValueError("No valid rows to import")

        columns_sql = ", ".join(f"`{c}`" for c in present)
        placeholders = ", ".join(["%s"] * len(present))
        update_cols = [c for c in present if c not in ("callid",)]
        query = f"INSERT INTO `{table_name}` ({columns_sql}) VALUES ({placeholders})"
        if update_cols:
            update_sql = ", ".join(f"`{c}` = VALUES(`{c}`)" for c in update_cols)
            query += f" ON DUPLICATE KEY UPDATE {update_sql}"

        chunk_size = 500
        for i in range(0, len(values), chunk_size):
            cursor.executemany(query, values[i : i + chunk_size])

    return {
        "processed": len(values),
        "skipped": skipped,
        "message": f"Imported {len(values)} call(s) from CSV. Pipeline will process new rows (status 0).",
    }


def insert_lead_recording(
    db_handler,
    config: Dict[str, Any],
    bid: str,
    lead_phone: str,
    payload: Dict[str, Any],
) -> Dict[str, Any]:
    """Create or update one manual-upload call for a lead."""
    table_name = f"{bid}_raw_calls"
    lead_phone = str(lead_phone or "").strip()
    if not lead_phone:
        raise ValueError("Lead phone is required")

    recording_url = str(payload.get("recording_url") or "").strip()
    file_data_b64 = payload.get("file_data_base64")
    file_name = str(payload.get("file_name") or "recording").strip()

    if file_data_b64:
        try:
            raw = base64.b64decode(str(file_data_b64), validate=False)
        except Exception as exc:
            raise ValueError("Invalid file_data_base64") from exc
        if not raw:
            raise ValueError("Uploaded file is empty")
        if len(raw) > 200 * 1024 * 1024:
            raise ValueError("File size must be 200 MB or less")
        recording_url = save_recording_bytes(bid, file_name, raw, config)

    if not recording_url:
        raise ValueError("recording_url or file_data_base64 is required")

    callid = _normalize_callid(payload.get("callid")) or f"manual_{secrets.token_hex(8)}"
    start_dt = _parse_datetime(payload.get("conversation_datetime")) or datetime.utcnow()
    end_dt = _parse_datetime(payload.get("call_endtime"))

    record: Dict[str, Any] = {
        "bid": str(bid),
        "callid": callid,
        "fileurl": recording_url,
        "customer_callinfo": lead_phone,
        "call_starttime": start_dt,
        "call_endtime": end_dt,
        "status": 0,
        "call_status": "ANSWER",
        "direction": str(payload.get("direction") or "inbound").strip().lower() or "inbound",
        "agentname": payload.get("agentname"),
        "groupname": payload.get("groupname"),
    }

    with db_handler.get_connection() as conn:
        cursor = conn.cursor()
        cursor.execute("SHOW TABLES LIKE %s", (table_name,))
        if not cursor.fetchone():
            raise ValueError(f"Table {table_name} does not exist")

        columns = _table_columns(cursor, table_name)
        if "extra_fields" in columns:
            record["extra_fields"] = json.dumps({"source": "manual_upload"})

        col_order = [
            "bid",
            "callid",
            "fileurl",
            "customer_callinfo",
            "call_starttime",
            "call_endtime",
            "status",
            "call_status",
            "direction",
            "agentname",
            "groupname",
            "extra_fields",
        ]
        present = [
            c
            for c in col_order
            if c in columns and c in record and record[c] is not None
        ]

        columns_sql = ", ".join(f"`{c}`" for c in present)
        placeholders = ", ".join(["%s"] * len(present))
        update_cols = [c for c in present if c not in ("callid",)]
        query = f"INSERT INTO `{table_name}` ({columns_sql}) VALUES ({placeholders})"
        if update_cols:
            update_sql = ", ".join(f"`{c}` = VALUES(`{c}`)" for c in update_cols)
            query += f" ON DUPLICATE KEY UPDATE {update_sql}"

        cursor.execute(query, [record[c] for c in present])

    return {
        "callid": callid,
        "fileurl": recording_url,
        "message": "Recording uploaded and queued for processing.",
    }