From 9eb10a9240eddbd544cb970e189c02e27e025522 Mon Sep 17 00:00:00 2001 From: Siddhant <87547498+Siddhant231xyz@users.noreply.github.com> Date: Tue, 21 Jan 2025 22:47:43 -0500 Subject: [PATCH] langchain: added vectorstore docstring linting (#29241) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ore.py Thank you for contributing to LangChain! - [x] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, etc. is being modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" Added docstring linting in the vectorstore.py file relating to issue #25154 - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17. --------- Co-authored-by: Siddhant Jain Co-authored-by: Erick Friis --- .../langchain/indexes/vectorstore.py | 87 +++++++++++++++++-- libs/langchain/pyproject.toml | 7 +- 2 files changed, 85 insertions(+), 9 deletions(-) diff --git a/libs/langchain/langchain/indexes/vectorstore.py b/libs/langchain/langchain/indexes/vectorstore.py index 08042c63d7292..db9adc4c6dab6 100644 --- a/libs/langchain/langchain/indexes/vectorstore.py +++ b/libs/langchain/langchain/indexes/vectorstore.py @@ -1,3 +1,5 @@ +"""Vectorstore stubs for the indexing api.""" + from typing import Any, Dict, List, Optional, Type from langchain_core.document_loaders import BaseLoader @@ -13,6 +15,7 @@ def _get_default_text_splitter() -> TextSplitter: + """Return the default text splitter used for chunking documents.""" return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) @@ -33,7 +36,17 @@ def query( retriever_kwargs: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> str: - """Query the vectorstore.""" + """Query the vectorstore using the provided LLM. + + Args: + question: The question or prompt to query. + llm: The language model to use. Must not be None. + retriever_kwargs: Optional keyword arguments for the retriever. + **kwargs: Additional keyword arguments forwarded to the chain. + + Returns: + The result string from the RetrievalQA chain. + """ if llm is None: raise NotImplementedError( "This API has been changed to require an LLM. " @@ -55,7 +68,17 @@ async def aquery( retriever_kwargs: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> str: - """Query the vectorstore.""" + """Asynchronously query the vectorstore using the provided LLM. + + Args: + question: The question or prompt to query. + llm: The language model to use. Must not be None. + retriever_kwargs: Optional keyword arguments for the retriever. + **kwargs: Additional keyword arguments forwarded to the chain. + + Returns: + The asynchronous result string from the RetrievalQA chain. + """ if llm is None: raise NotImplementedError( "This API has been changed to require an LLM. " @@ -77,7 +100,17 @@ def query_with_sources( retriever_kwargs: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> dict: - """Query the vectorstore and get back sources.""" + """Query the vectorstore and retrieve the answer along with sources. + + Args: + question: The question or prompt to query. + llm: The language model to use. Must not be None. + retriever_kwargs: Optional keyword arguments for the retriever. + **kwargs: Additional keyword arguments forwarded to the chain. + + Returns: + A dictionary containing the answer and source documents. + """ if llm is None: raise NotImplementedError( "This API has been changed to require an LLM. " @@ -99,7 +132,17 @@ async def aquery_with_sources( retriever_kwargs: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> dict: - """Query the vectorstore and get back sources.""" + """Asynchronously query the vectorstore and retrieve the answer and sources. + + Args: + question: The question or prompt to query. + llm: The language model to use. Must not be None. + retriever_kwargs: Optional keyword arguments for the retriever. + **kwargs: Additional keyword arguments forwarded to the chain. + + Returns: + A dictionary containing the answer and source documents. + """ if llm is None: raise NotImplementedError( "This API has been changed to require an LLM. " @@ -149,14 +192,28 @@ class VectorstoreIndexCreator(BaseModel): ) def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper: - """Create a vectorstore index from loaders.""" + """Create a vectorstore index from a list of loaders. + + Args: + loaders: A list of `BaseLoader` instances to load documents. + + Returns: + A `VectorStoreIndexWrapper` containing the constructed vectorstore. + """ docs = [] for loader in loaders: docs.extend(loader.load()) return self.from_documents(docs) async def afrom_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper: - """Create a vectorstore index from loaders.""" + """Asynchronously create a vectorstore index from a list of loaders. + + Args: + loaders: A list of `BaseLoader` instances to load documents. + + Returns: + A `VectorStoreIndexWrapper` containing the constructed vectorstore. + """ docs = [] for loader in loaders: async for doc in loader.alazy_load(): @@ -164,7 +221,14 @@ async def afrom_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrap return await self.afrom_documents(docs) def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper: - """Create a vectorstore index from documents.""" + """Create a vectorstore index from a list of documents. + + Args: + documents: A list of `Document` objects. + + Returns: + A `VectorStoreIndexWrapper` containing the constructed vectorstore. + """ sub_docs = self.text_splitter.split_documents(documents) vectorstore = self.vectorstore_cls.from_documents( sub_docs, self.embedding, **self.vectorstore_kwargs @@ -174,7 +238,14 @@ def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper: async def afrom_documents( self, documents: List[Document] ) -> VectorStoreIndexWrapper: - """Create a vectorstore index from documents.""" + """Asynchronously create a vectorstore index from a list of documents. + + Args: + documents: A list of `Document` objects. + + Returns: + A `VectorStoreIndexWrapper` containing the constructed vectorstore. + """ sub_docs = self.text_splitter.split_documents(documents) vectorstore = await self.vectorstore_cls.afrom_documents( sub_docs, self.embedding, **self.vectorstore_kwargs diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index ab264b924cf51..3516594c72cdf 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -52,7 +52,12 @@ version = ">=1.26.2,<3" python = ">=3.12" [tool.ruff.lint] -select = [ "E", "F", "I", "T201",] +select = [ "E", "F", "I", "T201", "D",] +pydocstyle = { convention = "google" } + +[tool.ruff.lint.per-file-ignores] +"tests/*" = ["D"] +"!langchain/indexes/vectorstore.py" = ["D"] [tool.coverage.run] omit = [ "tests/*",]