"""Google Patents arama — public XHR JSON endpoint.

patents.google.com'un kendi web UI'ı `https://patents.google.com/xhr/query?url=...`
public JSON endpoint'ine istek atar. Normal tarayıcı trafiği gibi çalışır.

Önemli:
- Hukuki yarı-gri alan — Google ToS scraping'e izin vermez ama XHR endpoint
  tarayıcı UI'ının kendisinin kullandığı public endpoint'tir. Rate-limit'e
  saygılı ol (1 req/2s), ek "resmi" kaynak olarak EPO'yu önce dene.
- Production'da: optional flag (ENABLE_GOOGLE_PATENTS=false default) ile
  kapatılabilir.
- Fallback için kullanışlı: EPO'da olmayan USPTO/CN başvurularına erişim.

Response schema (reverse engineered):
{
  "results": {
    "cluster": [
      {"result": [
        {"patent": {"publication_number": "US9876543B2",
                    "title": "...",
                    "snippet": "...",
                    "priority_date": "2019-03-15",
                    "publication_date": "2021-05-20",
                    "assignee": "...",
                    "inventor": [...]}},
        ...
      ]}
    ]
  }
}
"""

from __future__ import annotations

import asyncio
from urllib.parse import quote, urlencode

import httpx

from app.services.prior_art.models import PriorArtHit

_SEARCH_URL = "https://patents.google.com/xhr/query"
_USER_AGENT = (
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)


class GooglePatentsError(RuntimeError):
    """Google Patents HTTP / parse hatası."""


class GooglePatentsClient:
    """Google Patents XHR endpoint üzerinden arama.

    Rate-limit: bloke olmayı önlemek için `min_interval_seconds` kadar bekler.
    """

    def __init__(
        self,
        *,
        http_client: httpx.AsyncClient | None = None,
        min_interval_seconds: float = 2.0,
    ) -> None:
        self._external_client = http_client is not None
        self._http = http_client or httpx.AsyncClient(
            timeout=15.0,
            headers={"User-Agent": _USER_AGENT},
        )
        self._min_interval = min_interval_seconds
        self._lock = asyncio.Lock()
        self._last_request_at: float = 0.0

    async def close(self) -> None:
        if not self._external_client:
            await self._http.aclose()

    async def search(self, query: str, *, limit: int = 10) -> list[PriorArtHit]:
        """Google Patents arama (source='google').

        Args:
            query: Doğal dil veya Google Patents advanced syntax
                (ör. '(post-quantum) cryptography', 'inventor:Smith').
            limit: 1-50.
        """
        await self._respect_rate_limit()

        inner_params = urlencode({"q": query, "num": min(limit, 50)})
        outer_url = f"{_SEARCH_URL}?url={quote(inner_params)}"

        try:
            response = await self._http.get(outer_url)
        except httpx.HTTPError as exc:
            raise GooglePatentsError(f"HTTP hatası: {exc}") from exc

        if response.status_code != 200:
            raise GooglePatentsError(
                f"Google Patents HTTP {response.status_code}: {response.text[:200]}"
            )

        try:
            data = response.json()
        except ValueError as exc:
            raise GooglePatentsError(f"JSON parse hatası: {exc}") from exc

        return list(_parse_google_response(data, limit=limit))

    async def _respect_rate_limit(self) -> None:
        async with self._lock:
            now = asyncio.get_event_loop().time()
            wait = (self._last_request_at + self._min_interval) - now
            if wait > 0:
                await asyncio.sleep(wait)
            self._last_request_at = asyncio.get_event_loop().time()


# --------------------------- Parser ---------------------------


def _parse_google_response(data: dict, *, limit: int) -> list[PriorArtHit]:
    """Google Patents XHR response → PriorArtHit listesi."""
    clusters = data.get("results", {}).get("cluster", [])

    hits: list[PriorArtHit] = []
    for cluster in clusters:
        for result in cluster.get("result", []):
            patent = result.get("patent")
            if not isinstance(patent, dict):
                continue
            hit = _parse_single_patent(patent)
            if hit:
                hits.append(hit)
            if len(hits) >= limit:
                return hits
    return hits


def _parse_single_patent(patent: dict) -> PriorArtHit | None:
    pub_no = patent.get("publication_number")
    if not pub_no:
        return None

    inventors_raw = patent.get("inventor") or []
    inventors: list[str] | None = None
    if isinstance(inventors_raw, list):
        inventors = [str(i) for i in inventors_raw if i]
    elif isinstance(inventors_raw, str):
        inventors = [inventors_raw]

    assignee_raw = patent.get("assignee")
    applicant: str | None = None
    if isinstance(assignee_raw, list) and assignee_raw:
        applicant = str(assignee_raw[0])
    elif isinstance(assignee_raw, str):
        applicant = assignee_raw

    return PriorArtHit(
        source="google",
        patent_no=pub_no,
        title=patent.get("title"),
        abstract=patent.get("snippet"),
        applicant=applicant,
        inventors=inventors,
        filing_date=patent.get("priority_date"),
        publication_date=patent.get("publication_date"),
        cpc_classes=None,
        url=f"https://patents.google.com/patent/{pub_no}",
    )
