<?php
namespace App\Parser;

/**
 * No-Composer HTML parser for Salla product pages.
 * Prefers JSON-LD (schema.org) when available.
 */
class ProductParser
{
    public function parse(string $html): array
    {
        $dom = new \DOMDocument();
        \libxml_use_internal_errors(true);
        $dom->loadHTML($html);
        \libxml_clear_errors();

        $xp = new \DOMXPath($dom);

        [$productJson, $breadcrumbJson] = $this->extractJsonLd($xp);

        $name = $productJson['name'] ?? $this->firstText($xp, '//h1');
        $sku = $productJson['sku'] ?? null;
        $brand = $productJson['brand']['name'] ?? null;
        $offers = is_array($productJson['offers'] ?? null) ? $productJson['offers'] : [];
        $price = $offers['price'] ?? null;
        $currency = $offers['priceCurrency'] ?? 'SAR';
        $availability = $offers['availability'] ?? null;

        $canonical = $this->firstAttr($xp, '//link[@rel="canonical"]', 'href');

        // Description HTML
        $descHtml = null;
        $descNode = $xp->query('//*[contains(concat(" ", normalize-space(@class), " "), " product__description ")]');
        if ($descNode && $descNode->length) {
            $descHtml = $this->innerHTML($descNode->item(0));
        } elseif (isset($productJson['description'])) {
            $descHtml = (string)$productJson['description'];
        }
        $descText = $descHtml ? trim(strip_tags($descHtml)) : null;

        // Categories from breadcrumb
        $categories = [];
        if (is_array($breadcrumbJson) && isset($breadcrumbJson['itemListElement']) && is_array($breadcrumbJson['itemListElement'])) {
            foreach ($breadcrumbJson['itemListElement'] as $it) {
                $n = $it['item']['name'] ?? null;
                if ($n) $categories[] = $n;
            }
        }

        $images = $this->extractImages($xp, $productJson);

        $mainImage = $images[0] ?? ($productJson['image'] ?? null);
        if (is_array($mainImage)) $mainImage = $mainImage[0] ?? null;

        return [
            'source_url' => $canonical,
            'sku' => $sku,
            'name' => $name,
            'price' => $price,
            'currency' => $currency,
            'availability' => $availability,
            'brand' => $brand,
            'categories' => $categories,
            'description_html' => $descHtml,
            'description_text' => $descText,
            'images' => $images,
            'main_image_url' => is_string($mainImage) ? $mainImage : null,
        ];
    }

    private function extractJsonLd(\DOMXPath $xp): array
    {
        $product = null;
        $breadcrumb = null;

        $nodes = $xp->query('//script[@type="application/ld+json"]');
        if ($nodes) {
            foreach ($nodes as $n) {
                $json = trim($n->textContent ?? '');
                if ($json === '') continue;
                $decoded = json_decode(html_entity_decode($json, ENT_QUOTES | ENT_HTML5, 'UTF-8'), true);
                if (!is_array($decoded)) continue;

                $candidates = [];
                if (isset($decoded['@type'])) $candidates[] = $decoded;
                if (isset($decoded['@graph']) && is_array($decoded['@graph'])) {
                    $candidates = array_merge($candidates, $decoded['@graph']);
                }

                foreach ($candidates as $item) {
                    if (($item['@type'] ?? null) === 'Product') $product = $item;
                    if (($item['@type'] ?? null) === 'BreadcrumbList') $breadcrumb = $item;
                }
            }
        }

        return [$product ?? [], $breadcrumb ?? []];
    }

    private function extractImages(\DOMXPath $xp, array $product): array
    {
        $urls = [];

        $imgs = $xp->query('//img');
        if ($imgs) {
            foreach ($imgs as $img) {
                if (!$img instanceof \DOMElement) continue;
                $u = $img->getAttribute('data-src') ?: $img->getAttribute('src');
                if ($u && preg_match('~^https?://~', $u)) {
                    $urls[] = $u;
                }
            }
        }

        if (empty($urls) && isset($product['image'])) {
            $img = $product['image'];
            if (is_string($img) && preg_match('~^https?://~', $img)) $urls[] = $img;
            if (is_array($img)) {
                foreach ($img as $u) {
                    if (is_string($u) && preg_match('~^https?://~', $u)) $urls[] = $u;
                }
            }
        }

        // unique keep order
        $out = [];
        $seen = [];
        foreach ($urls as $u) {
            if (isset($seen[$u])) continue;
            $seen[$u] = true;
            $out[] = $u;
        }
        return $out;
    }

    private function firstText(\DOMXPath $xp, string $query): ?string
    {
        $n = $xp->query($query);
        if ($n && $n->length) return trim($n->item(0)->textContent);
        return null;
    }

    private function firstAttr(\DOMXPath $xp, string $query, string $attr): ?string
    {
        $n = $xp->query($query);
        if ($n && $n->length && $n->item(0) instanceof \DOMElement) {
            $v = $n->item(0)->getAttribute($attr);
            return $v !== '' ? $v : null;
        }
        return null;
    }

    private function innerHTML(\DOMNode $node): string
    {
        $html = '';
        foreach ($node->childNodes as $child) {
            $html .= $node->ownerDocument->saveHTML($child);
        }
        return $html;
    }
}
