#!/usr/bin/env python3
"""One-off cleanup: remove or mark sub-min / unprocessed calls in raw_calls."""

from __future__ import annotations

import argparse
import logging
import os
import sys

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from config import Config
from db_handler import DatabaseHandler
from min_duration_util import call_duration_seconds, purge_unprocessed_if_below_min, should_skip_at_ingest

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--bid", required=True, help="Business ID")
    parser.add_argument("--dry-run", action="store_true", help="Report only; do not modify rows")
    parser.add_argument("--limit", type=int, default=5000, help="Max rows to scan")
    args = parser.parse_args()

    bid = str(args.bid).strip()
    db = DatabaseHandler(Config())
    cfg = db.get_pipeline_config(bid) or {}
    min_s = max(0, int(cfg.get("min_call_duration_s") or 0))
    effective_at = cfg.get("min_call_duration_effective_at") or db.ensure_min_duration_effective_at(bid)

    if min_s <= 0:
        logger.info("BID %s has no min_call_duration_s configured; nothing to clean", bid)
        return 0

    raw_table = f"{bid}_raw_calls"

    with db.get_connection() as conn:
        cursor = conn.cursor()
        if not db._table_exists(cursor, raw_table):
            logger.error("Table %s does not exist", raw_table)
            return 1

        cursor.execute(
            f"""
            SELECT callid, call_starttime, call_endtime, duration_seconds,
                   transcription_status, status
            FROM `{raw_table}`
            WHERE COALESCE(transcription_status, '') != 'skipped_short'
            ORDER BY call_starttime DESC
            LIMIT %s
            """,
            (args.limit,),
        )
        rows = cursor.fetchall() or []

    scanned = 0
    acted = 0
    for row in rows:
        scanned += 1
        call_row = dict(row)
        if not should_skip_at_ingest(call_row, min_s, effective_at):
            continue
        duration = call_duration_seconds(call_row)
        logger.info(
            "BID %s call %s duration=%ss status=%s transcription_status=%s",
            bid,
            call_row.get("callid"),
            duration,
            call_row.get("status"),
            call_row.get("transcription_status"),
        )
        if args.dry_run:
            acted += 1
            continue
        with db.get_connection() as conn:
            cursor = conn.cursor()
            if purge_unprocessed_if_below_min(
                cursor, bid, call_row["callid"], call_row, min_s, effective_at
            ):
                acted += 1
            conn.commit()

    logger.info(
        "BID %s: scanned=%s acted=%s dry_run=%s min_duration_s=%s",
        bid,
        scanned,
        acted,
        args.dry_run,
        min_s,
    )
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
