Source code for haive.core.engine.document.loaders.adapters.base
"""Base loader adapter for document loaders.This module provides the base LoaderAdapter class that all specificloader adapters inherit from, establishing a consistent interface."""fromabcimportABC,abstractmethodfromlangchain_core.documentsimportDocumentfromhaive.core.engine.document.loaders.sources.base.baseimportBaseSource
[docs]classLoaderAdapter(ABC):"""Base adapter class for document loaders. LoaderAdapter provides a unified interface for loading documents from different source types using various langchain document loaders. Each adapter is responsible for: 1. Creating the appropriate loader for a specific source type 2. Configuring the loader with the correct parameters 3. Loading documents from the source 4. Optionally, implementing fetch_all functionality when supported """def__init__(self,source:BaseSource,**params):"""Initialize the adapter. Args: source: The source to load documents from **params: Additional parameters for the loader """self.source=sourceself.params=params
[docs]@abstractmethoddefload(self)->list[Document]:"""Load documents from the source. This method must be implemented by all subclasses. Returns: List of loaded documents """
[docs]defload_and_split(self,**split_params)->list[Document]:"""Load and split documents. This method loads documents and then splits them into chunks using a text splitter. Args: **split_params: Parameters for the text splitter Returns: List of document chunks """# Load documentsdocs=self.load()# Get text splitter from parameterstext_splitter=split_params.pop("text_splitter",None)iftext_splitterisNone:# Create default text splitterfromlangchain_text_splittersimportRecursiveCharacterTextSplittertext_splitter=RecursiveCharacterTextSplitter(**split_params)# Split documentsreturntext_splitter.split_documents(docs)
[docs]deffetch_all(self)->list[BaseSource]:"""Fetch all available sources (if supported). This method is used for sources that contain multiple sub-sources, such as directories or sitemaps. Returns: List of individual sources Raises: NotImplementedError: If the adapter doesn't support fetch_all """raiseNotImplementedError(f"This loader adapter ({self.__class__.__name__}) doesn't support fetch_all")