Source code for pyfetcher.metadata.opengraph

"""Open Graph metadata extraction for :mod:`pyfetcher`.

Purpose:
    Extract common Open Graph fields from HTML ``<meta property="og:*">`` tags.

Examples:
    ::

        >>> html = "<meta property='og:title' content='Example' />"
        >>> extract_open_graph_metadata(html).title
        'Example'
"""

from __future__ import annotations

from bs4 import BeautifulSoup

from pyfetcher.metadata.models import OpenGraphMetadata


[docs] def extract_open_graph_metadata(html: str) -> OpenGraphMetadata | None: """Extract Open Graph metadata from HTML. Parses ``og:title``, ``og:description``, ``og:image``, ``og:site_name``, ``og:url``, and ``og:type`` meta tags from the provided HTML. Returns ``None`` if no Open Graph fields are found. Args: html: Raw HTML string to parse. Returns: An :class:`~pyfetcher.metadata.models.OpenGraphMetadata` instance, or ``None`` if no OG fields exist. Examples: :: >>> html = "<html><head><meta property='og:title' content='Example' /></head></html>" >>> extract_open_graph_metadata(html).title 'Example' """ soup = BeautifulSoup(html, "html.parser") def _get(property_name: str) -> str | None: tag = soup.find("meta", attrs={"property": property_name}) return tag.get("content", "").strip() if tag and tag.get("content") else None metadata = OpenGraphMetadata( title=_get("og:title"), description=_get("og:description"), image=_get("og:image"), site_name=_get("og:site_name"), url=_get("og:url"), type=_get("og:type"), ) if not any(metadata.model_dump().values()): return None return metadata