#!/usr/bin/env python3
"""
Import LeadSquared leads from a CSV export into crm_leads_cache.

Usage:
    python3 import_lsq_csv.py leads.csv
    python3 import_lsq_csv.py leads.csv --bid 6004 --dry-run

The CSV must have been exported from LeadSquared (Leads → Export).
Required columns (case-insensitive, flexible naming):
  - Phone / Mobile / PhoneNumber
Optional columns (auto-detected):
  - First Name / FirstName / Name / ProspectName
  - Last Name / LastName
  - Owner / OwnerName / Sales Owner
  - Email / EmailAddress
  - Lead Status / LeadStatus / Status
  - Next Task Due Date / NextTaskDueDate

Any LSQ export CSV with at minimum a phone column will work.
"""
import argparse
import csv
import hashlib
import json
import logging
import os
import sys
from datetime import datetime
from typing import Any, Dict, List, Optional

from dotenv import load_dotenv

load_dotenv()

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
logger = logging.getLogger('import_lsq_csv')

BID_DEFAULT = '6004'


# ── Phone helpers ─────────────────────────────────────────────────────────────

def _normalize_phone(phone: Any) -> List[str]:
    digits = ''.join(ch for ch in str(phone or '') if ch.isdigit())
    if not digits:
        return []
    core10 = digits[-10:] if len(digits) > 10 else digits
    variants = {digits, f'+{digits}', core10}
    if len(core10) == 10:
        variants.update({f'91{core10}', f'+91{core10}', f'0{core10}'})
    return [v for v in variants if v]


# ── Column detection ──────────────────────────────────────────────────────────

def _find_col(headers: List[str], *candidates) -> Optional[str]:
    h_lower = {h.lower().strip(): h for h in headers}
    for c in candidates:
        if c.lower() in h_lower:
            return h_lower[c.lower()]
    return None


def _val(row: Dict, col: Optional[str]) -> Optional[str]:
    if not col:
        return None
    v = (row.get(col) or '').strip()
    return v if v else None


# ── External ID (stable hash for leads without explicit ID) ───────────────────

def _external_id(row: Dict, col_map: Dict) -> str:
    eid = _val(row, col_map.get('prospect_id'))
    if eid:
        return eid
    phone = _val(row, col_map.get('phone')) or ''
    email = _val(row, col_map.get('email')) or ''
    name = (_val(row, col_map.get('first_name')) or '') + ' ' + (_val(row, col_map.get('last_name')) or '')
    name = name.strip()
    return hashlib.sha256(f'{phone}|{email}|{name}'.encode()).hexdigest()


# ── DB upsert ─────────────────────────────────────────────────────────────────

def _upsert(cursor, bid: str, row: Dict, col_map: Dict) -> None:
    phone = _val(row, col_map.get('phone'))
    if not phone:
        return

    first = _val(row, col_map.get('first_name')) or ''
    last = _val(row, col_map.get('last_name')) or ''
    name = (first + ' ' + last).strip() or _val(row, col_map.get('name'))
    owner = _val(row, col_map.get('owner'))
    email = _val(row, col_map.get('email'))
    status = _val(row, col_map.get('status'))
    next_due = _val(row, col_map.get('next_due'))
    external_id = _external_id(row, col_map)
    variants = _normalize_phone(phone)
    now = datetime.utcnow()

    cursor.execute("""
        INSERT INTO crm_leads_cache
            (bid, provider, external_lead_id, lead_name, owner_name, email,
             phone_primary, phone_variants, lead_status, next_task_due_date,
             lead_payload, last_synced_at)
        VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
        ON DUPLICATE KEY UPDATE
            lead_name        = COALESCE(VALUES(lead_name), lead_name),
            owner_name       = COALESCE(VALUES(owner_name), owner_name),
            email            = COALESCE(VALUES(email), email),
            phone_primary    = VALUES(phone_primary),
            phone_variants   = VALUES(phone_variants),
            lead_status      = COALESCE(VALUES(lead_status), lead_status),
            next_task_due_date = COALESCE(VALUES(next_task_due_date), next_task_due_date),
            lead_payload     = VALUES(lead_payload),
            last_synced_at   = VALUES(last_synced_at),
            updated_at       = NOW()
    """, (
        bid, 'leadsquared', external_id, name or None, owner, email,
        phone, json.dumps(variants), status, next_due,
        json.dumps(dict(row)), now,
    ))


# ── Main ──────────────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(description='Import LSQ CSV leads into crm_leads_cache')
    parser.add_argument('csv_file', help='Path to the LSQ export CSV file')
    parser.add_argument('--bid', default=BID_DEFAULT, help=f'Business ID (default: {BID_DEFAULT})')
    parser.add_argument('--dry-run', action='store_true', help='Parse CSV but do not write to DB')
    args = parser.parse_args()

    if not os.path.isfile(args.csv_file):
        logger.error('File not found: %s', args.csv_file)
        sys.exit(1)

    # Read CSV
    with open(args.csv_file, newline='', encoding='utf-8-sig') as f:
        reader = csv.DictReader(f)
        headers = reader.fieldnames or []
        rows = list(reader)

    logger.info('Loaded %d rows from %s', len(rows), args.csv_file)
    logger.info('Columns: %s', headers)

    # Detect columns
    col_map = {
        'prospect_id': _find_col(headers, 'ProspectID', 'Lead Id', 'LeadId', 'Id', 'prospect_id'),
        'phone': _find_col(headers, 'Phone', 'Mobile', 'PhoneNumber', 'phone', 'mobile', 'Phone Number'),
        'first_name': _find_col(headers, 'First Name', 'FirstName', 'first_name'),
        'last_name': _find_col(headers, 'Last Name', 'LastName', 'last_name'),
        'name': _find_col(headers, 'Name', 'ProspectName', 'Contact Name', 'name', 'Full Name'),
        'owner': _find_col(headers, 'Owner', 'OwnerName', 'Sales Owner', 'owner_name', 'Assigned To', 'Agent'),
        'email': _find_col(headers, 'Email', 'EmailAddress', 'Email Address', 'email'),
        'status': _find_col(headers, 'Lead Status', 'LeadStatus', 'Status', 'status'),
        'next_due': _find_col(headers, 'Next Task Due Date', 'NextTaskDueDate', 'Next Due Date'),
    }

    logger.info('Column mapping: %s', {k: v for k, v in col_map.items() if v})

    if not col_map.get('phone'):
        logger.error('No phone column found in CSV. Columns available: %s', headers)
        sys.exit(1)

    if args.dry_run:
        logger.info('--- DRY RUN --- (not writing to DB)')
        no_phone = 0
        for i, row in enumerate(rows[:5], 1):
            phone = _val(row, col_map['phone'])
            name = ((_val(row, col_map.get('first_name')) or '') + ' ' + (_val(row, col_map.get('last_name')) or '')).strip() or _val(row, col_map.get('name'))
            owner = _val(row, col_map.get('owner'))
            logger.info('Row %d: phone=%s name=%s owner=%s', i, phone, name, owner)
            if not phone:
                no_phone += 1
        logger.info('... showing first 5 rows. Total=%d, sample no-phone=%d', len(rows), no_phone)
        return

    # Import to DB
    import pymysql
    from pymysql.cursors import DictCursor

    DEST_DB = {
        'host': os.getenv('DB_HOST', '127.0.0.1'),
        'user': os.getenv('DB_USER', 'admin'),
        'password': os.getenv('DB_PASSWORD', ''),
        'database': os.getenv('DB_NAME', 'voicebot_cluster'),
        'charset': 'utf8mb4',
        'cursorclass': DictCursor
    }

    conn = pymysql.connect(**DEST_DB)
    cursor = conn.cursor()

    inserted = 0
    skipped = 0

    for row in rows:
        phone = _val(row, col_map['phone'])
        if not phone:
            skipped += 1
            continue
        try:
            _upsert(cursor, args.bid, row, col_map)
            inserted += 1
        except Exception as e:
            logger.warning('Failed to upsert row (phone=%s): %s', phone, e)
            skipped += 1

    conn.commit()
    conn.close()

    logger.info('Done. Upserted=%d  skipped(no phone)=%d  total=%d', inserted, skipped, len(rows))


if __name__ == '__main__':
    main()
