#!/usr/bin/env python3
"""pinlight -- CLI bookmark manager.

Import bookmarks from markdown files and apply diffs from the web interface.

Usage:
    pinlight.py import <markdown_file> [--interactive] [--dry-run] [-f bookmarks.json]
    pinlight.py apply <diff_file> [--dry-run] [-f bookmarks.json]
    pinlight.py repl [-f bookmarks.json]
    pinlight.py stats [-f bookmarks.json]
"""

import argparse
import json
import re
import readline  # noqa: F401 — enables line editing in input()
import sys
from datetime import datetime, timezone
from pathlib import Path


# ---------------------------------------------------------------------------
# Data I/O
# ---------------------------------------------------------------------------

def load_bookmarks(path: Path) -> list[dict]:
    if not path.exists():
        return []
    with open(path, encoding="utf-8") as f:
        return json.load(f)


def save_bookmarks(bookmarks: list[dict], path: Path) -> None:
    with open(path, "w", encoding="utf-8") as f:
        json.dump(bookmarks, f, indent=2, ensure_ascii=False)
        f.write("\n")


def normalize_url(url: str) -> str:
    """Normalize a URL for dedup comparison."""
    url = url.strip()
    # lowercase scheme + host
    if "://" in url:
        scheme, rest = url.split("://", 1)
        if "/" in rest:
            host, path = rest.split("/", 1)
            url = f"{scheme.lower()}://{host.lower()}/{path}"
        else:
            url = f"{scheme.lower()}://{rest.lower()}"
    return url.rstrip("/")


# ---------------------------------------------------------------------------
# Markdown parser
# ---------------------------------------------------------------------------

def parse_markdown(path: Path) -> list[dict]:
    """Parse a markdown file with date headings and bookmark list items."""
    content = path.read_text(encoding="utf-8")
    return parse_markdown_text(content)


def parse_markdown_text(content: str, default_date: str | None = None) -> list[dict]:
    """Parse markdown text into bookmark dicts.

    Supports:
        #### YYYY-MM-DD        (h4 heading)
        - YYYY-MM-DD           (list item as date)
        - [title](url) note. #tag1 #tag2
        - [title](url) via [label](via_url). #tag

    Multi-line entries are joined. Sub-list items are promoted.
    If no date heading is found, ``default_date`` (or today) is used.
    """
    if default_date is None:
        default_date = datetime.now().strftime("%Y-%m-%d")

    # Split by date delimiters: #### date or - date (bare, not followed by [)
    date_re = re.compile(
        r"^(?:#{1,4}\s+|[-*]\s+)(\d{4}-\d{2}-\d{2})\s*$", re.MULTILINE
    )

    parts = date_re.split(content)
    bookmarks = []

    # parts[0] is text before the first date (if any) — parse with default_date
    preamble = parts[0] if parts else content
    for entry in _collect_entries(preamble):
        bookmark = _parse_entry(entry, default_date)
        if bookmark is not None:
            bookmarks.append(bookmark)

    # Remaining parts alternate: date, block, date, block, ...
    for i in range(1, len(parts), 2):
        current_date = parts[i]
        block = parts[i + 1] if i + 1 < len(parts) else ""
        for entry in _collect_entries(block):
            bookmark = _parse_entry(entry, current_date)
            if bookmark is not None:
                bookmarks.append(bookmark)

    return bookmarks


def _collect_entries(block: str) -> list[str]:
    """Collect list-item entries from a block, joining continuation lines."""
    lines = block.split("\n")
    entries: list[str] = []
    current: str | None = None

    for line in lines:
        # New top-level list item
        if re.match(r"^[-*]\s+", line):
            if current is not None:
                entries.append(current)
            current = line
        # Sub-list item (indented)
        elif re.match(r"^\s+[-*]\s+", line):
            if current is not None:
                entries.append(current)
            current = line.strip()
        # Continuation line
        elif current is not None and line.strip():
            current += " " + line.strip()
        # Blank line or heading — flush
        else:
            if current is not None:
                entries.append(current)
                current = None

    if current is not None:
        entries.append(current)

    return entries


def _parse_entry(entry: str, date: str) -> dict | None:
    """Parse a single list-item entry into a bookmark dict."""
    # Must contain at least one markdown link
    link_match = re.search(r"\[([^\]]+)\]\(([^)]+)\)", entry)
    if not link_match:
        return None

    title = link_match.group(1).strip()
    url = link_match.group(2).strip()

    # Tags: #word-word patterns
    tags = re.findall(r"#([\w-]+)", entry)

    # Via link: via [label](url)
    via = None
    via_match = re.search(r"via\s+\[([^\]]+)\]\(([^)]+)\)", entry)
    if via_match:
        via = {"label": via_match.group(1), "url": via_match.group(2)}

    # Build the note from whatever descriptive text remains
    note = _extract_note(entry, title)

    # Add paper tag for arxiv URLs
    if "arxiv.org" in url and "paper" not in tags:
        tags.append("paper")

    bookmark: dict = {
        "url": url,
        "title": title,
        "date": date,
        "favorite": False,
        "note": note,
        "tags": tags,
    }
    if via:
        bookmark["via"] = via

    return bookmark


def _extract_note(entry: str, title: str) -> str:
    """Extract descriptive note text from a raw entry string."""
    note = entry

    # Strip list marker
    note = re.sub(r"^[-*]\s+", "", note).strip()

    # Replace markdown links with their text
    note = re.sub(r"\[([^\]]*)\]\([^)]*\)", r"\1", note)

    # Remove tags
    note = re.sub(r"#[\w-]+", "", note)

    # Remove via references (linked form)
    note = re.sub(r"\bvia\s+\w[\w.]*\s*", "", note)

    # Strip markdown bold/italic
    note = re.sub(r"\*{1,2}([^*]+)\*{1,2}", r"\1", note)
    note = re.sub(r"\\([*])", r"\1", note)  # unescape \*

    # Collapse whitespace
    note = re.sub(r"\s+", " ", note).strip(" .,;:")

    # Remove the title text itself
    note = note.replace(title, "", 1).strip(" .,;:")

    # Final cleanup
    note = re.sub(r"\s+", " ", note).strip(" .,;:")

    return note


# ---------------------------------------------------------------------------
# Import / merge
# ---------------------------------------------------------------------------

def merge_bookmarks(
    existing: list[dict],
    incoming: list[dict],
    interactive: bool = False,
) -> tuple[list[dict], list[dict]]:
    """Merge incoming bookmarks into existing, skipping duplicates by URL.

    Returns (merged_list, newly_added).
    """
    seen = {normalize_url(b["url"]) for b in existing}
    added: list[dict] = []

    for bookmark in incoming:
        norm = normalize_url(bookmark["url"])
        if norm in seen:
            continue

        if interactive:
            if not _confirm_entry(bookmark):
                continue

        seen.add(norm)
        added.append(bookmark)

    merged = existing + added
    # Sort by date descending
    merged.sort(key=lambda b: b.get("date", ""), reverse=True)
    return merged, added


def _print_bookmark(bookmark: dict, prefix: str = "  ") -> None:
    """Pretty-print a bookmark to stdout."""
    print(f"{prefix}\033[1m{bookmark['title']}\033[0m")
    print(f"{prefix}{bookmark['url']}")
    if bookmark.get("note"):
        print(f"{prefix}\033[2m{bookmark['note']}\033[0m")
    if bookmark.get("tags"):
        print(f"{prefix}tags: {', '.join(bookmark['tags'])}")
    if bookmark.get("via"):
        via = bookmark["via"]
        label = via.get("label", "")
        url = via.get("url", "")
        print(f"{prefix}via: {label}" + (f" ({url})" if url else ""))
    print(f"{prefix}date: {bookmark['date']}")


def _confirm_entry(bookmark: dict) -> bool:
    """Prompt the user to confirm importing a single entry."""
    print()
    _print_bookmark(bookmark)
    print()

    while True:
        try:
            answer = input("  Import? [y/n/q] ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            print()
            return False
        if answer in ("y", "yes", ""):
            return True
        if answer in ("n", "no"):
            return False
        if answer in ("q", "quit"):
            print("  Aborting remaining imports.")
            raise KeyboardInterrupt


# ---------------------------------------------------------------------------
# Diff format
# ---------------------------------------------------------------------------
#
# {
#   "version": 1,
#   "generated": "2026-04-15T12:00:00Z",
#   "changes": [
#     {"url": "...", "action": "update", "fields": {"favorite": true, "note": "..."}},
#     {"url": "...", "action": "delete"}
#   ]
# }

def load_diff(path: Path) -> dict:
    with open(path, encoding="utf-8") as f:
        diff = json.load(f)

    if not isinstance(diff, dict) or "changes" not in diff:
        raise ValueError(f"Invalid diff file: missing 'changes' key")

    version = diff.get("version", 1)
    if version != 1:
        raise ValueError(f"Unsupported diff version: {version}")

    for i, change in enumerate(diff["changes"]):
        if "url" not in change:
            raise ValueError(f"Change #{i}: missing 'url'")
        if "action" not in change:
            raise ValueError(f"Change #{i}: missing 'action'")
        if change["action"] not in ("update", "delete"):
            raise ValueError(f"Change #{i}: unknown action '{change['action']}'")
        if change["action"] == "update" and "fields" not in change:
            raise ValueError(f"Change #{i}: 'update' action requires 'fields'")

    return diff


UPDATABLE_FIELDS = {"favorite", "note", "tags"}


def apply_diff(
    bookmarks: list[dict], diff: dict
) -> tuple[list[dict], dict]:
    """Apply a diff to bookmarks. Returns (updated_bookmarks, summary)."""
    index = {normalize_url(b["url"]): i for i, b in enumerate(bookmarks)}
    result = list(bookmarks)
    to_delete: set[int] = set()

    summary: dict[str, list] = {"updated": [], "deleted": [], "not_found": []}

    for change in diff["changes"]:
        url = change["url"]
        norm = normalize_url(url)
        idx = index.get(norm)

        if idx is None:
            summary["not_found"].append(url)
            continue

        action = change["action"]

        if action == "delete":
            to_delete.add(idx)
            summary["deleted"].append(url)

        elif action == "update":
            fields = change["fields"]
            unknown = set(fields.keys()) - UPDATABLE_FIELDS
            if unknown:
                print(
                    f"  warning: ignoring unknown fields {unknown} for {url}",
                    file=sys.stderr,
                )
            for key in UPDATABLE_FIELDS & set(fields.keys()):
                result[idx] = {**result[idx], key: fields[key]}
            summary["updated"].append(url)

    # Remove deleted entries (reverse order to preserve indices)
    for idx in sorted(to_delete, reverse=True):
        del result[idx]

    return result, summary


# ---------------------------------------------------------------------------
# Stats
# ---------------------------------------------------------------------------

def print_stats(bookmarks: list[dict]) -> None:
    total = len(bookmarks)
    favs = sum(1 for b in bookmarks if b.get("favorite"))
    dates = [b["date"] for b in bookmarks if b.get("date")]
    tags: dict[str, int] = {}
    for b in bookmarks:
        for t in b.get("tags", []):
            tags[t] = tags.get(t, 0) + 1

    print(f"Bookmarks: {total}")
    print(f"Favorites: {favs}")
    if dates:
        print(f"Date range: {min(dates)} — {max(dates)}")
    print(f"Tags: {len(tags)} unique")

    if tags:
        top = sorted(tags.items(), key=lambda x: -x[1])[:15]
        width = max(len(t) for t, _ in top)
        for t, c in top:
            bar = "█" * min(c, 40)
            print(f"  {t:<{width}}  {c:>3}  {bar}")


# ---------------------------------------------------------------------------
# CLI commands
# ---------------------------------------------------------------------------

def cmd_import(args: argparse.Namespace) -> None:
    md_path = args.markdown
    if not md_path.exists():
        print(f"error: file not found: {md_path}", file=sys.stderr)
        sys.exit(1)

    if args.interactive and not sys.stdin.isatty():
        print("error: --interactive requires a TTY", file=sys.stderr)
        sys.exit(1)

    bk_path = Path(args.file)
    out_path = args.output or bk_path

    existing = load_bookmarks(bk_path)
    incoming = parse_markdown(md_path)

    print(f"Parsed {len(incoming)} entries from {md_path}")
    print(f"Existing bookmarks: {len(existing)}")

    try:
        merged, added = merge_bookmarks(existing, incoming, args.interactive)
    except KeyboardInterrupt:
        print("\nAborted.")
        sys.exit(1)

    if not added:
        print("No new bookmarks to add.")
        return

    print(f"New bookmarks: {len(added)}")

    if args.dry_run:
        print("\n[dry run] Would add:")
        for b in added:
            tags = " ".join(f"#{t}" for t in b.get("tags", []))
            print(f"  + {b['title']}")
            print(f"    {b['url']}  {tags}")
        return

    save_bookmarks(merged, out_path)
    print(f"Saved {len(merged)} bookmarks to {out_path}")


def cmd_apply(args: argparse.Namespace) -> None:
    diff_path = args.diff
    if not diff_path.exists():
        print(f"error: file not found: {diff_path}", file=sys.stderr)
        sys.exit(1)

    bk_path = Path(args.file)
    out_path = args.output or bk_path

    bookmarks = load_bookmarks(bk_path)

    try:
        diff = load_diff(diff_path)
    except (json.JSONDecodeError, ValueError) as e:
        print(f"error: {e}", file=sys.stderr)
        sys.exit(1)

    n_changes = len(diff["changes"])
    print(f"Diff: {n_changes} change{'s' if n_changes != 1 else ''}")

    if args.dry_run:
        print("\n[dry run] Would apply:")
        for change in diff["changes"]:
            action = change["action"]
            url = change["url"]
            if action == "delete":
                print(f"  - DELETE {url}")
            elif action == "update":
                fields = ", ".join(
                    f"{k}={v!r}" for k, v in change["fields"].items()
                )
                print(f"  ~ UPDATE {url}  ({fields})")
        return

    result, summary = apply_diff(bookmarks, diff)

    if summary["updated"]:
        print(f"Updated: {len(summary['updated'])}")
    if summary["deleted"]:
        print(f"Deleted: {len(summary['deleted'])}")
    if summary["not_found"]:
        print(f"Not found ({len(summary['not_found'])}):")
        for url in summary["not_found"]:
            print(f"  ? {url}", file=sys.stderr)

    save_bookmarks(result, out_path)
    print(f"Saved {len(result)} bookmarks to {out_path}")


def cmd_repl(args: argparse.Namespace) -> None:
    if not sys.stdin.isatty():
        print("error: repl requires a TTY", file=sys.stderr)
        sys.exit(1)

    bk_path = Path(args.file)
    bookmarks = load_bookmarks(bk_path)
    seen = {normalize_url(b["url"]) for b in bookmarks}
    added_this_session: list[dict] = []

    print(f"pinlight repl — {len(bookmarks)} bookmarks loaded")
    print("Paste markdown content (entries, with or without date headers).")
    print("Submit with a blank line. Type 'save' to write, 'quit' to exit.\n")

    while True:
        # Read a block of input until blank line or command
        try:
            first_line = input("\001\033[1m\002>\001\033[0m\002 ")
        except (EOFError, KeyboardInterrupt):
            print()
            break

        cmd = first_line.strip().lower()
        if cmd in ("quit", "exit", "q"):
            break
        if cmd == "save":
            if not added_this_session:
                print("Nothing to save.")
                continue
            all_bookmarks = bookmarks + added_this_session
            all_bookmarks.sort(key=lambda b: b.get("date", ""), reverse=True)
            save_bookmarks(all_bookmarks, bk_path)
            print(f"Saved {len(all_bookmarks)} bookmarks to {bk_path}")
            bookmarks = all_bookmarks
            added_this_session = []
            continue
        if cmd == "stats":
            all_bookmarks = bookmarks + added_this_session
            print_stats(all_bookmarks)
            continue
        if cmd == "help":
            print("  Paste markdown bookmark entries, end with a blank line.")
            print("  Commands: save, stats, undo, help, quit")
            continue
        if cmd == "undo":
            if not added_this_session:
                print("  Nothing to undo.")
                continue
            removed = added_this_session.pop()
            seen.discard(normalize_url(removed["url"]))
            print(f"  Removed: {removed['title']}")
            continue

        # Accumulate lines until blank line
        lines = [first_line]
        while True:
            try:
                line = input("  ")
            except (EOFError, KeyboardInterrupt):
                print()
                break
            if line.strip() == "":
                break
            lines.append(line)

        text = "\n".join(lines)
        if not text.strip():
            continue

        # Try to parse
        parsed = parse_markdown_text(text)

        if not parsed:
            # Normalize: add "- " prefix to lines that start with [ or >
            # so the parser can handle them (e.g. "> [title](url)" or "[title](url)")
            normalized = re.sub(
                r"^(?:>\s*)?(\[)", r"- \1", text, flags=re.MULTILINE
            )
            if normalized != text:
                parsed = parse_markdown_text(normalized)

        if not parsed:
            # Maybe it's just a bare URL?
            url_match = re.match(r"^\s*(https?://\S+)\s*$", text.strip())
            if url_match:
                url = url_match.group(1)
                parsed = [{
                    "url": url,
                    "title": url,
                    "date": datetime.now().strftime("%Y-%m-%d"),
                    "favorite": False,
                    "note": "",
                    "tags": [],
                }]
            else:
                print("  \033[33mCouldn't parse any bookmarks from that input.\033[0m")
                print("  Expected: [title](url) description. #tag1 #tag2")
                continue

        # Process each parsed entry
        for bookmark in parsed:
            norm = normalize_url(bookmark["url"])
            if norm in seen:
                print(f"  \033[33mskip (duplicate):\033[0m {bookmark['title']}")
                continue

            print()
            _print_bookmark(bookmark)
            print()

            while True:
                try:
                    answer = input(
                        "  [\001\033[1m\002y\001\033[0m\002]es / [n]o / [e]dit tags / [q]uit > "
                    ).strip().lower()
                except (EOFError, KeyboardInterrupt):
                    print()
                    answer = "q"

                if answer in ("y", "yes", ""):
                    seen.add(norm)
                    added_this_session.append(bookmark)
                    print(f"  \033[32m+\033[0m added ({len(added_this_session)} pending)")
                    break
                elif answer in ("n", "no"):
                    print("  skipped")
                    break
                elif answer in ("e", "edit"):
                    current = ", ".join(bookmark["tags"]) if bookmark["tags"] else "(none)"
                    try:
                        new_tags = input(f"  tags [{current}]: ").strip()
                    except (EOFError, KeyboardInterrupt):
                        print()
                        break
                    if new_tags:
                        bookmark["tags"] = [
                            t.strip().lstrip("#")
                            for t in re.split(r"[,\s]+", new_tags)
                            if t.strip()
                        ]
                    # Also allow editing note
                    current_note = bookmark.get("note", "")
                    try:
                        new_note = input(
                            f"  note [{current_note or '(empty)'}]: "
                        ).strip()
                    except (EOFError, KeyboardInterrupt):
                        print()
                        break
                    if new_note:
                        bookmark["note"] = new_note
                    # Re-display and loop back to confirm
                    print()
                    _print_bookmark(bookmark)
                    print()
                    continue
                elif answer in ("q", "quit"):
                    # Save what we have so far
                    if added_this_session:
                        print(f"\n  {len(added_this_session)} pending entries not saved.")
                        try:
                            save_answer = input("  Save before quitting? [y/n] ").strip().lower()
                        except (EOFError, KeyboardInterrupt):
                            save_answer = "n"
                        if save_answer in ("y", "yes"):
                            all_bk = bookmarks + added_this_session
                            all_bk.sort(key=lambda b: b.get("date", ""), reverse=True)
                            save_bookmarks(all_bk, bk_path)
                            print(f"  Saved {len(all_bk)} bookmarks.")
                    return

    # End of repl — prompt to save if needed
    if added_this_session:
        print(f"\n{len(added_this_session)} pending entries not saved.")
        try:
            answer = input("Save before exiting? [y/n] ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            answer = "n"
        if answer in ("y", "yes", ""):
            all_bk = bookmarks + added_this_session
            all_bk.sort(key=lambda b: b.get("date", ""), reverse=True)
            save_bookmarks(all_bk, bk_path)
            print(f"Saved {len(all_bk)} bookmarks to {bk_path}")
        else:
            print("Discarded.")
    else:
        print("No changes.")


def cmd_stats(args: argparse.Namespace) -> None:
    bk_path = Path(args.file)
    bookmarks = load_bookmarks(bk_path)
    if not bookmarks:
        print("No bookmarks found.")
        return
    print_stats(bookmarks)


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main() -> None:
    parser = argparse.ArgumentParser(
        prog="pinlight",
        description="Bookmark manager CLI",
    )
    parser.add_argument(
        "-f", "--file",
        default="bookmarks.json",
        help="Path to bookmarks.json (default: bookmarks.json)",
    )

    sub = parser.add_subparsers(dest="command")
    sub.required = True

    # import
    p_import = sub.add_parser(
        "import",
        help="Import bookmarks from a markdown file",
    )
    p_import.add_argument("markdown", type=Path, help="Markdown file to import")
    p_import.add_argument(
        "--interactive", "-i",
        action="store_true",
        help="Confirm each entry individually",
    )
    p_import.add_argument(
        "--dry-run", "-n",
        action="store_true",
        help="Show what would be imported without writing",
    )
    p_import.add_argument("-o", "--output", type=Path, default=None)

    # apply
    p_apply = sub.add_parser(
        "apply",
        help="Apply a diff file (from web UI) to update/delete bookmarks",
    )
    p_apply.add_argument("diff", type=Path, help="Diff JSON file")
    p_apply.add_argument(
        "--dry-run", "-n",
        action="store_true",
        help="Show what would change without writing",
    )
    p_apply.add_argument("-o", "--output", type=Path, default=None)

    # repl
    sub.add_parser(
        "repl",
        help="Interactive mode: paste content, review and add bookmarks",
    )

    # stats
    sub.add_parser("stats", help="Show bookmark statistics")

    args = parser.parse_args()

    if args.command == "import":
        cmd_import(args)
    elif args.command == "apply":
        cmd_apply(args)
    elif args.command == "repl":
        cmd_repl(args)
    elif args.command == "stats":
        cmd_stats(args)


if __name__ == "__main__":
    main()