Skip to content

Commit

Permalink
langchain: added vectorstore docstring linting (#29241)
Browse files Browse the repository at this point in the history
…ore.py

Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, community, core, etc. is
being modified. Use "docs: ..." for purely docs changes, "infra: ..."
for CI changes.
  - Example: "community: add foobar LLM"
  
Added docstring linting in the vectorstore.py file relating to issue
#25154


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.
- If you are adding something to community, do not re-import it in
langchain.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Siddhant Jain <[email protected]>
Co-authored-by: Erick Friis <[email protected]>
  • Loading branch information
3 people authored Jan 22, 2025
1 parent a2ed796 commit 9eb10a9
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 9 deletions.
87 changes: 79 additions & 8 deletions libs/langchain/langchain/indexes/vectorstore.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Vectorstore stubs for the indexing api."""

from typing import Any, Dict, List, Optional, Type

from langchain_core.document_loaders import BaseLoader
Expand All @@ -13,6 +15,7 @@


def _get_default_text_splitter() -> TextSplitter:
"""Return the default text splitter used for chunking documents."""
return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)


Expand All @@ -33,7 +36,17 @@ def query(
retriever_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> str:
"""Query the vectorstore."""
"""Query the vectorstore using the provided LLM.
Args:
question: The question or prompt to query.
llm: The language model to use. Must not be None.
retriever_kwargs: Optional keyword arguments for the retriever.
**kwargs: Additional keyword arguments forwarded to the chain.
Returns:
The result string from the RetrievalQA chain.
"""
if llm is None:
raise NotImplementedError(
"This API has been changed to require an LLM. "
Expand All @@ -55,7 +68,17 @@ async def aquery(
retriever_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> str:
"""Query the vectorstore."""
"""Asynchronously query the vectorstore using the provided LLM.
Args:
question: The question or prompt to query.
llm: The language model to use. Must not be None.
retriever_kwargs: Optional keyword arguments for the retriever.
**kwargs: Additional keyword arguments forwarded to the chain.
Returns:
The asynchronous result string from the RetrievalQA chain.
"""
if llm is None:
raise NotImplementedError(
"This API has been changed to require an LLM. "
Expand All @@ -77,7 +100,17 @@ def query_with_sources(
retriever_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> dict:
"""Query the vectorstore and get back sources."""
"""Query the vectorstore and retrieve the answer along with sources.
Args:
question: The question or prompt to query.
llm: The language model to use. Must not be None.
retriever_kwargs: Optional keyword arguments for the retriever.
**kwargs: Additional keyword arguments forwarded to the chain.
Returns:
A dictionary containing the answer and source documents.
"""
if llm is None:
raise NotImplementedError(
"This API has been changed to require an LLM. "
Expand All @@ -99,7 +132,17 @@ async def aquery_with_sources(
retriever_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> dict:
"""Query the vectorstore and get back sources."""
"""Asynchronously query the vectorstore and retrieve the answer and sources.
Args:
question: The question or prompt to query.
llm: The language model to use. Must not be None.
retriever_kwargs: Optional keyword arguments for the retriever.
**kwargs: Additional keyword arguments forwarded to the chain.
Returns:
A dictionary containing the answer and source documents.
"""
if llm is None:
raise NotImplementedError(
"This API has been changed to require an LLM. "
Expand Down Expand Up @@ -149,22 +192,43 @@ class VectorstoreIndexCreator(BaseModel):
)

def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper:
"""Create a vectorstore index from loaders."""
"""Create a vectorstore index from a list of loaders.
Args:
loaders: A list of `BaseLoader` instances to load documents.
Returns:
A `VectorStoreIndexWrapper` containing the constructed vectorstore.
"""
docs = []
for loader in loaders:
docs.extend(loader.load())
return self.from_documents(docs)

async def afrom_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper:
"""Create a vectorstore index from loaders."""
"""Asynchronously create a vectorstore index from a list of loaders.
Args:
loaders: A list of `BaseLoader` instances to load documents.
Returns:
A `VectorStoreIndexWrapper` containing the constructed vectorstore.
"""
docs = []
for loader in loaders:
async for doc in loader.alazy_load():
docs.append(doc)
return await self.afrom_documents(docs)

def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper:
"""Create a vectorstore index from documents."""
"""Create a vectorstore index from a list of documents.
Args:
documents: A list of `Document` objects.
Returns:
A `VectorStoreIndexWrapper` containing the constructed vectorstore.
"""
sub_docs = self.text_splitter.split_documents(documents)
vectorstore = self.vectorstore_cls.from_documents(
sub_docs, self.embedding, **self.vectorstore_kwargs
Expand All @@ -174,7 +238,14 @@ def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper:
async def afrom_documents(
self, documents: List[Document]
) -> VectorStoreIndexWrapper:
"""Create a vectorstore index from documents."""
"""Asynchronously create a vectorstore index from a list of documents.
Args:
documents: A list of `Document` objects.
Returns:
A `VectorStoreIndexWrapper` containing the constructed vectorstore.
"""
sub_docs = self.text_splitter.split_documents(documents)
vectorstore = await self.vectorstore_cls.afrom_documents(
sub_docs, self.embedding, **self.vectorstore_kwargs
Expand Down
7 changes: 6 additions & 1 deletion libs/langchain/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,12 @@ version = ">=1.26.2,<3"
python = ">=3.12"

[tool.ruff.lint]
select = [ "E", "F", "I", "T201",]
select = [ "E", "F", "I", "T201", "D",]
pydocstyle = { convention = "google" }

[tool.ruff.lint.per-file-ignores]
"tests/*" = ["D"]
"!langchain/indexes/vectorstore.py" = ["D"]

[tool.coverage.run]
omit = [ "tests/*",]
Expand Down

0 comments on commit 9eb10a9

Please sign in to comment.