Skip to content

Commit

Permalink
add retriever
Browse files Browse the repository at this point in the history
  • Loading branch information
lfunderburk committed Oct 6, 2023
1 parent 7e6603f commit 8993ca1
Show file tree
Hide file tree
Showing 9 changed files with 493 additions and 61 deletions.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion app.py → first-rag-bot/chainlit-app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from haystack.pipelines import Pipeline

# Load environment variables (if any)
load_dotenv(".env")
load_dotenv("../.env")
load_dotenv()
MY_API_KEY = os.getenv("OPENAI_API_KEY")

Expand Down
File renamed without changes.
File renamed without changes
455 changes: 396 additions & 59 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ python = "^3.9"
torch = [
{url = "https://download.pytorch.org/whl/cpu/torch-1.10.0%2Bcpu-cp39-cp39-linux_x86_64.whl", markers = "sys_platform == 'linux'"},
]
farm-haystack = {extras = ["inference"], version = "^1.20.1"}
farm-haystack = {extras = ["pinecone"], version = "^1.21.2"}
chainlit = "^0.7.0"
openai = "^0.28.0"
jupyter = "^1.0.0"
Expand Down
7 changes: 7 additions & 0 deletions retrieval-qa/datadownload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import os
from dotenv import load_dotenv

# Load environment variables (if any)
load_dotenv("../.env")
openaikey = os.getenv("OPENAI_API_KEY")
pinecone = os.getenv("PINECONE_API_KEY")
88 changes: 88 additions & 0 deletions retrieval-qa/retrieve.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"from haystack.nodes import WebRetriever\n",
"from haystack.nodes import LinkContentFetcher\n",
"from haystack.schema import Document\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from typing import List \n",
"\n",
"# Load environment variables (if any)\n",
"load_dotenv(\"../.env\")\n",
"serp = os.getenv(\"SERP_API_KEY\")"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"# Initialize WebRetriever\n",
"# You need to replace 'your_api_key' with an actual API key\n",
"retriever = WebRetriever(api_key=serp, \n",
" mode=\"preprocessed_documents\",\n",
" top_k=100)\n",
"\n",
"# Retrieve documents based on a query\n",
"# You might need to use a query that would result in the IMDb page you are interested in\n",
"# This is a hypothetical example, replace 'movie reviews' with a relevant query\n",
"documents = retriever.retrieve(query=\"IMDB movie reviews for the Barbie movie (2023)\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'url': 'https://imdb.com/title/tt1517268/',\n",
" 'timestamp': 1696653893,\n",
" 'search.score': 0.5263157894736843,\n",
" 'search.position': None,\n",
" 'snippet_text': \"a thought-provoking exploration of Barbie's role as a feminist symbol, challenging societal perceptions of femininity and girlhood\",\n",
" '_split_id': 0}"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"documents[0].meta"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llm-pipelines",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 8993ca1

Please sign in to comment.