From 9eb10a9240eddbd544cb970e189c02e27e025522 Mon Sep 17 00:00:00 2001
From: Siddhant <87547498+Siddhant231xyz@users.noreply.github.com>
Date: Tue, 21 Jan 2025 22:47:43 -0500
Subject: [PATCH] langchain: added vectorstore docstring linting (#29241)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…ore.py

Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, community, core, etc. is
being modified. Use "docs: ..." for purely docs changes, "infra: ..."
for CI changes.
  - Example: "community: add foobar LLM"

Added docstring linting in the vectorstore.py file relating to issue
#25154


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.
- If you are adding something to community, do not re-import it in
langchain.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Siddhant Jain <sjain35@buffalo.edu>
Co-authored-by: Erick Friis <erick@langchain.dev>
---
 .../langchain/indexes/vectorstore.py          | 87 +++++++++++++++++--
 libs/langchain/pyproject.toml                 |  7 +-
 2 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/libs/langchain/langchain/indexes/vectorstore.py b/libs/langchain/langchain/indexes/vectorstore.py
index 08042c63d7292..db9adc4c6dab6 100644
--- a/libs/langchain/langchain/indexes/vectorstore.py
+++ b/libs/langchain/langchain/indexes/vectorstore.py
@@ -1,3 +1,5 @@
+"""Vectorstore stubs for the indexing api."""
+
 from typing import Any, Dict, List, Optional, Type
 
 from langchain_core.document_loaders import BaseLoader
@@ -13,6 +15,7 @@
 
 
 def _get_default_text_splitter() -> TextSplitter:
+    """Return the default text splitter used for chunking documents."""
     return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 
 
@@ -33,7 +36,17 @@ def query(
         retriever_kwargs: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> str:
-        """Query the vectorstore."""
+        """Query the vectorstore using the provided LLM.
+
+        Args:
+            question: The question or prompt to query.
+            llm: The language model to use. Must not be None.
+            retriever_kwargs: Optional keyword arguments for the retriever.
+            **kwargs: Additional keyword arguments forwarded to the chain.
+
+        Returns:
+            The result string from the RetrievalQA chain.
+        """
         if llm is None:
             raise NotImplementedError(
                 "This API has been changed to require an LLM. "
@@ -55,7 +68,17 @@ async def aquery(
         retriever_kwargs: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> str:
-        """Query the vectorstore."""
+        """Asynchronously query the vectorstore using the provided LLM.
+
+        Args:
+            question: The question or prompt to query.
+            llm: The language model to use. Must not be None.
+            retriever_kwargs: Optional keyword arguments for the retriever.
+            **kwargs: Additional keyword arguments forwarded to the chain.
+
+        Returns:
+            The asynchronous result string from the RetrievalQA chain.
+        """
         if llm is None:
             raise NotImplementedError(
                 "This API has been changed to require an LLM. "
@@ -77,7 +100,17 @@ def query_with_sources(
         retriever_kwargs: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> dict:
-        """Query the vectorstore and get back sources."""
+        """Query the vectorstore and retrieve the answer along with sources.
+
+        Args:
+            question: The question or prompt to query.
+            llm: The language model to use. Must not be None.
+            retriever_kwargs: Optional keyword arguments for the retriever.
+            **kwargs: Additional keyword arguments forwarded to the chain.
+
+        Returns:
+            A dictionary containing the answer and source documents.
+        """
         if llm is None:
             raise NotImplementedError(
                 "This API has been changed to require an LLM. "
@@ -99,7 +132,17 @@ async def aquery_with_sources(
         retriever_kwargs: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> dict:
-        """Query the vectorstore and get back sources."""
+        """Asynchronously query the vectorstore and retrieve the answer and sources.
+
+        Args:
+            question: The question or prompt to query.
+            llm: The language model to use. Must not be None.
+            retriever_kwargs: Optional keyword arguments for the retriever.
+            **kwargs: Additional keyword arguments forwarded to the chain.
+
+        Returns:
+            A dictionary containing the answer and source documents.
+        """
         if llm is None:
             raise NotImplementedError(
                 "This API has been changed to require an LLM. "
@@ -149,14 +192,28 @@ class VectorstoreIndexCreator(BaseModel):
     )
 
     def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper:
-        """Create a vectorstore index from loaders."""
+        """Create a vectorstore index from a list of loaders.
+
+        Args:
+            loaders: A list of `BaseLoader` instances to load documents.
+
+        Returns:
+            A `VectorStoreIndexWrapper` containing the constructed vectorstore.
+        """
         docs = []
         for loader in loaders:
             docs.extend(loader.load())
         return self.from_documents(docs)
 
     async def afrom_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper:
-        """Create a vectorstore index from loaders."""
+        """Asynchronously create a vectorstore index from a list of loaders.
+
+        Args:
+            loaders: A list of `BaseLoader` instances to load documents.
+
+        Returns:
+            A `VectorStoreIndexWrapper` containing the constructed vectorstore.
+        """
         docs = []
         for loader in loaders:
             async for doc in loader.alazy_load():
@@ -164,7 +221,14 @@ async def afrom_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrap
         return await self.afrom_documents(docs)
 
     def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper:
-        """Create a vectorstore index from documents."""
+        """Create a vectorstore index from a list of documents.
+
+        Args:
+            documents: A list of `Document` objects.
+
+        Returns:
+            A `VectorStoreIndexWrapper` containing the constructed vectorstore.
+        """
         sub_docs = self.text_splitter.split_documents(documents)
         vectorstore = self.vectorstore_cls.from_documents(
             sub_docs, self.embedding, **self.vectorstore_kwargs
@@ -174,7 +238,14 @@ def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper:
     async def afrom_documents(
         self, documents: List[Document]
     ) -> VectorStoreIndexWrapper:
-        """Create a vectorstore index from documents."""
+        """Asynchronously create a vectorstore index from a list of documents.
+
+        Args:
+            documents: A list of `Document` objects.
+
+        Returns:
+            A `VectorStoreIndexWrapper` containing the constructed vectorstore.
+        """
         sub_docs = self.text_splitter.split_documents(documents)
         vectorstore = await self.vectorstore_cls.afrom_documents(
             sub_docs, self.embedding, **self.vectorstore_kwargs
diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
index ab264b924cf51..3516594c72cdf 100644
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -52,7 +52,12 @@ version = ">=1.26.2,<3"
 python = ">=3.12"
 
 [tool.ruff.lint]
-select = [ "E", "F", "I", "T201",]
+select = [ "E", "F", "I", "T201", "D",]
+pydocstyle = { convention = "google" }
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["D"]
+"!langchain/indexes/vectorstore.py" = ["D"]
 
 [tool.coverage.run]
 omit = [ "tests/*",]