from __future__ import annotations

from collections import defaultdict
import json
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple

from openpyxl import load_workbook

from _pipeline_utils import _generated_timestamp, m4_m6_abjad_digits

if hasattr(sys, "set_int_max_str_digits"):
    sys.set_int_max_str_digits(0)

METRIC_ORDER = ["M1", "M2", "M3", "M4", "M5", "M6", "M7"]
EXPECTED_MUSHAFS = [
    "Bazzi(Basra)",
    "Bazzi(damascus)",
    "Bazzi(himsi)",
    "Bazzi(Kufa)",
    "Bazzi(Mecca)",
    "Bazzi(Medina I)",
    "Bazzi(Medina II)",
    "Bazzi(VERSE 0)",
    "Doori(Basra)",
    "Doori(damascus)",
    "Doori(himsi)",
    "Doori(Kufa)",
    "Doori(Mecca)",
    "Doori(Medina I)",
    "Doori(Medina II)",
    "Doori(VERSE 0)",
    "Hafs(Basra)",
    "Hafs(damascus)",
    "Hafs(himsi)",
    "Hafs(Kufa)",
    "Hafs(Mecca)",
    "Hafs(Medina I)",
    "Hafs(Medina II)",
    "Hafs(VERSE 0)",
    "Qaloon(Basra)",
    "Qaloon(damascus)",
    "Qaloon(himsi)",
    "Qaloon(Kufa)",
    "Qaloon(Mecca)",
    "Qaloon(Medina I)",
    "Qaloon(Medina II)",
    "Qaloon(VERSE 0)",
    "Qumball(Basra)",
    "Qumball(damascus)",
    "Qumball(himsi)",
    "Qumball(Kufa)",
    "Qumball(Mecca)",
    "Qumball(Medina I)",
    "Qumball(Medina II)",
    "Qumball(VERSE 0)",
    "Shouba(Basra)",
    "Shouba(damascus)",
    "Shouba(himsi)",
    "Shouba(Kufa)",
    "Shouba(Mecca)",
    "Shouba(Medina I)",
    "Shouba(Medina II)",
    "Shouba(VERSE 0)",
    "Soosi(Basra)",
    "Soosi(damascus)",
    "Soosi(himsi)",
    "Soosi(Kufa)",
    "Soosi(Mecca)",
    "Soosi(Medina I)",
    "Soosi(Medina II)",
    "Soosi(VERSE 0)",
    "Submission",
    "The_Criterion",
    "Warsh(Basra)",
    "Warsh(damascus)",
    "Warsh(himsi)",
    "Warsh(Kufa)",
    "Warsh(Mecca)",
    "Warsh(Medina I)",
    "Warsh(Medina II)",
    "Warsh(VERSE 0)",
]


def _to_int(value: object) -> Optional[int]:
    """Convert supported cell values to int; return None when not numeric."""
    if value is None:
        return None

    if isinstance(value, bool):
        return None

    if isinstance(value, int):
        return value

    if isinstance(value, float):
        return int(value) if value.is_integer() else None

    if isinstance(value, str):
        stripped = value.strip()
        if not stripped:
            return None
        if stripped.isdigit():
            return int(stripped)

    return None


def _mushaf_name_from_file(file_path: Path) -> str:
    """Extract mushaf name from '<Name>_7Metrics.xlsx'."""
    return file_path.stem.replace("_7Metrics", "")


def _short_metric_name(metric_name: str) -> str:
    """Convert metric header like 'M1_LetterCount' to 'M1'."""
    return metric_name.split("_", 1)[0]


def _collect_excel_files(input_dir: Path) -> List[Path]:
    files = list(input_dir.glob("*_7Metrics.xlsx"))
    if not files:
        raise FileNotFoundError("No files matching '*_7Metrics.xlsx' were found.")

    by_mushaf = {_mushaf_name_from_file(path): path for path in files}
    missing = [name for name in EXPECTED_MUSHAFS if name not in by_mushaf]
    extras = sorted(name for name in by_mushaf if name not in EXPECTED_MUSHAFS)

    if missing or extras:
        raise ValueError(
            "Dataset set mismatch; "
            f"missing={missing or 'None'}, extras={extras or 'None'}"
        )

    return [by_mushaf[name] for name in EXPECTED_MUSHAFS]


def _discover_metrics(headers: List[str]) -> List[Tuple[int, str, str]]:
    non_metric = {"sura", "verse", "text"}
    return [
        (i, headers[i], _short_metric_name(headers[i]))
        for i in range(len(headers))
        if headers[i] and headers[i].lower() not in non_metric
    ]


def _resolve_worksheet(wb, file_path: Path):
    candidates = []
    for sheet_name in wb.sheetnames:
        ws = wb[sheet_name]
        try:
            header_cells = next(ws.iter_rows(min_row=1, max_row=1, values_only=True))
        except StopIteration:
            continue

        headers = [str(h).strip() if h is not None else "" for h in header_cells]
        header_to_idx = {h.lower(): i for i, h in enumerate(headers)}
        if "sura" not in header_to_idx or "verse" not in header_to_idx:
            continue

        discovered = _discover_metrics(headers)
        if len(discovered) != 7:
            continue

        short_names = {_short_metric_name(name) for _, name, _ in discovered}
        if short_names != set(METRIC_ORDER):
            continue

        candidates.append(ws)

    if len(candidates) != 1:
        raise ValueError(
            f"Expected exactly one valid worksheet in {file_path.name}; found {len(candidates)}"
        )

    return candidates[0]


def _digit_length(value: int) -> int:
    """Return number of decimal digits for an integer value."""
    return len(str(abs(value)))


def analyze_file(
    file_path: Path,
) -> Tuple[int, Dict[str, int], List[str], List[Dict[str, object]], List[str]]:
    """Return total suras, per-metric lock counts, detail lines, and per-sura records."""
    wb = load_workbook(file_path, read_only=True, data_only=True)
    sura_metric_totals: Dict[int, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
    try:
        ws = _resolve_worksheet(wb, file_path)

        header_cells = next(ws.iter_rows(min_row=1, max_row=1, values_only=True))
        headers = [str(h).strip() if h is not None else "" for h in header_cells]
        header_to_idx = {h.lower(): i for i, h in enumerate(headers)}

        if "sura" not in header_to_idx:
            raise ValueError("Missing required column: sura")

        sura_idx = header_to_idx["sura"]
        if "text" not in header_to_idx:
            raise ValueError("Missing required column: text")
        text_idx = header_to_idx["text"]
        discovered_metrics: List[Tuple[int, str, str]] = _discover_metrics(headers)
        if len(discovered_metrics) != 7:
            raise ValueError(f"Expected 7 metrics, found {len(discovered_metrics)} in {file_path.name}")

        metric_by_short = {short: (idx, full, short) for idx, full, short in discovered_metrics}
        missing = [m for m in METRIC_ORDER if m not in metric_by_short]
        extras = [m for m in metric_by_short if m not in METRIC_ORDER]
        if missing or extras:
            raise ValueError(
                f"Metric headers mismatch in {file_path.name}; missing={missing or 'None'}, extras={extras or 'None'}"
            )

        metric_columns: List[Tuple[int, str, str]] = [metric_by_short[m] for m in METRIC_ORDER]

        for row_number, row in enumerate(ws.iter_rows(min_row=2, values_only=True), start=2):
            if row is None:
                continue

            sura_value = row[sura_idx] if sura_idx < len(row) else None
            sura = _to_int(sura_value)
            if sura is None:
                raise ValueError(f"Invalid sura at {file_path.name} row {row_number}; sura must be integer")

            if not 1 <= sura <= 114:
                raise ValueError(f"Invalid sura at {file_path.name} row {row_number}; got {sura}")

            row_text = row[text_idx] if text_idx < len(row) else None
            m4_digits, m6_digits = m4_m6_abjad_digits(row_text)

            for col_idx, metric_name, metric_key in metric_columns:
                if metric_key == "M4":
                    metric_value = int(m4_digits)
                elif metric_key == "M6":
                    metric_value = int(m6_digits)
                else:
                    if col_idx >= len(row):
                        raise ValueError(
                            f"Missing metric cell for {metric_key} at {file_path.name} row {row_number}"
                        )
                    metric_value = _to_int(row[col_idx])
                    if metric_value is None:
                        raise ValueError(
                            f"Invalid metric value for {metric_key} at {file_path.name} row {row_number}; must be integer"
                        )

                sura_metric_totals[sura][metric_key] += metric_value
    finally:
        wb.close()

    mushaf_name = _mushaf_name_from_file(file_path)
    metric_locks: Dict[str, int] = defaultdict(int)
    detail_lines: List[str] = []
    sura_records: List[Dict[str, object]] = []

    for sura in sorted(sura_metric_totals):
        metric_hits: List[str] = []
        metric_records: Dict[str, Dict[str, object]] = {}
        for metric_name in sorted(sura_metric_totals[sura]):
            total = sura_metric_totals[sura][metric_name]
            mod_19 = total % 19
            is_multiple = mod_19 == 0
            metric_records[metric_name] = {
                "number": str(total),
                "digit_length": _digit_length(total),
                "mod19": mod_19,
                "is_multiple_of_19": is_multiple,
            }

            if is_multiple:
                metric_locks[metric_name] += 1
                metric_hits.append(f"{metric_name}={total}")

        sura_records.append(
            {
                "sura": sura,
                "metrics": metric_records,
            }
        )

        if metric_hits:
            detail_lines.append(f"  - {mushaf_name} | sura {sura} -> " + ", ".join(metric_hits))

    return len(sura_metric_totals), metric_locks, detail_lines, sura_records


def build_summary(input_dir: Path, output_file: Path) -> None:
    excel_files = _collect_excel_files(input_dir)

    lines: List[str] = []
    bool_lines: List[str] = []
    generated_at = _generated_timestamp()
    lines.append("SURA LOCK DATASET SUMMARIES")
    lines.append(f"Generated: {generated_at}")
    lines.append("")
    lines.append(f"Datasets scanned: {len(excel_files)}")
    lines.append("")
    bool_lines.append("SURA LOCK DATASET SUMMARIES (BOOLEAN)")
    bool_lines.append(f"Generated: {generated_at}")
    bool_lines.append("")
    bool_lines.append(f"Datasets scanned: {len(excel_files)}")
    bool_lines.append("")

    json_payload: Dict[str, object] = {
        "schema_version": "1.2",
        "summary_type": "sura_lock",
        "generated": generated_at,
        "modulus": 19,
        "metric_order": METRIC_ORDER,
        "datasets_scanned": len(excel_files),
        "datasets": [],
    }

    for index, file_path in enumerate(excel_files, start=1):
        suras_scanned, metric_locks, detail_lines, sura_records = analyze_file(file_path)
        dataset_total_locks = sum(metric_locks.values())
        mushaf_name = _mushaf_name_from_file(file_path)
        short_locks = {k: metric_locks[k] for k in metric_locks}
        normalized_lock_counts = {
            metric: short_locks.get(metric, 0)
            for metric in METRIC_ORDER
        }
        normalized_lock_bools = {metric: count > 0 for metric, count in normalized_lock_counts.items()}

        lines.append(f"DATASET {index}: {mushaf_name}")
        lines.append(f"Suras scanned: {suras_scanned}")
        lines.append(f"Total metric locks (multiples of 19): {dataset_total_locks}")
        lines.append("Locks by metric:")
        bool_lines.append(f"DATASET {index}: {mushaf_name}")
        bool_lines.append(f"Suras scanned: {suras_scanned}")
        bool_lines.append(f"Total metric locks (multiples of 19): {dataset_total_locks > 0}")
        bool_lines.append("Locks by metric:")

        if metric_locks:
            for metric_name in sorted(metric_locks):
                lines.append(f"- {metric_name}: {metric_locks[metric_name]}")
                bool_lines.append(f"- {metric_name}: {metric_locks[metric_name] > 0}")
        else:
            lines.append("- None")
            bool_lines.append("- None")

        lines.append("Sura-level lock details:")
        bool_lines.append("Sura-level lock details:")
        if detail_lines:
            lines.extend(detail_lines)
            for dl in detail_lines:
                bool_lines.append(re.sub(r"=\d+", "=true", dl))
        else:
            lines.append("- None")
            bool_lines.append("- None")

        json_payload["datasets"].append(
            {
                "dataset_index": index,
                "mushaf": mushaf_name,
                "suras_scanned": suras_scanned,
                "total_metric_locks": dataset_total_locks,
                "total_metric_locks_bool": dataset_total_locks > 0,
                "locks_by_metric": normalized_lock_counts,
                "locks_by_metric_bool": normalized_lock_bools,
                "sura_records": sura_records,
            }
        )

        if index != len(excel_files):
            lines.append("")
            bool_lines.append("")

    output_file.parent.mkdir(parents=True, exist_ok=True)
    output_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
    bool_output = output_file.parent / (output_file.stem + "_bool.txt")
    bool_output.write_text("\n".join(bool_lines) + "\n", encoding="utf-8")
    json_dir = output_file.parent.parent / "json"
    json_dir.mkdir(parents=True, exist_ok=True)
    with (json_dir / (output_file.stem + ".json")).open("w", encoding="utf-8") as handle:
        json.dump(json_payload, handle, ensure_ascii=True, indent=2)


def main() -> None:
    root = Path(__file__).resolve().parent
    output_path = root / "summary" / "txt" / "grand_sura_lock_summary.txt"
    build_summary(root, output_path)
    print(f"Summary written to: {output_path}")


if __name__ == "__main__":
    main()
