Skip to content

Commit

Permalink
Add Elasticsearch datasource to webapp (microsoft#418)
Browse files Browse the repository at this point in the history
Co-authored-by: Sarah Widder <[email protected]>
Co-authored-by: Abby Hartman <[email protected]>
  • Loading branch information
3 people authored Dec 5, 2023
1 parent 9f92236 commit 2967b29
Show file tree
Hide file tree
Showing 4 changed files with 995 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ frontend/node_modules
# static
.azure/
__pycache__/
.ipynb_checkpoints/
56 changes: 55 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import openai
import copy
from azure.identity import DefaultAzureCredential
from base64 import b64encode
from flask import Flask, Response, request, jsonify, send_from_directory
from dotenv import load_dotenv

Expand Down Expand Up @@ -97,6 +98,21 @@ def assets(path):
AZURE_COSMOSDB_CONVERSATIONS_CONTAINER = os.environ.get("AZURE_COSMOSDB_CONVERSATIONS_CONTAINER")
AZURE_COSMOSDB_ACCOUNT_KEY = os.environ.get("AZURE_COSMOSDB_ACCOUNT_KEY")

# Elasticsearch Integration Settings
ELASTICSEARCH_ENDPOINT = os.environ.get("ELASTICSEARCH_ENDPOINT")
ELASTICSEARCH_ENCODED_API_KEY = os.environ.get("ELASTICSEARCH_ENCODED_API_KEY")
ELASTICSEARCH_INDEX = os.environ.get("ELASTICSEARCH_INDEX")
ELASTICSEARCH_QUERY_TYPE = os.environ.get("ELASTICSEARCH_QUERY_TYPE", "simple")
ELASTICSEARCH_TOP_K = os.environ.get("ELASTICSEARCH_TOP_K", SEARCH_TOP_K)
ELASTICSEARCH_ENABLE_IN_DOMAIN = os.environ.get("ELASTICSEARCH_ENABLE_IN_DOMAIN", SEARCH_ENABLE_IN_DOMAIN)
ELASTICSEARCH_CONTENT_COLUMNS = os.environ.get("ELASTICSEARCH_CONTENT_COLUMNS")
ELASTICSEARCH_FILENAME_COLUMN = os.environ.get("ELASTICSEARCH_FILENAME_COLUMN")
ELASTICSEARCH_TITLE_COLUMN = os.environ.get("ELASTICSEARCH_TITLE_COLUMN")
ELASTICSEARCH_URL_COLUMN = os.environ.get("ELASTICSEARCH_URL_COLUMN")
ELASTICSEARCH_VECTOR_COLUMNS = os.environ.get("ELASTICSEARCH_VECTOR_COLUMNS")
ELASTICSEARCH_STRICTNESS = os.environ.get("ELASTICSEARCH_STRICTNESS", SEARCH_STRICTNESS)
ELASTICSEARCH_EMBEDDING_MODEL_ID = os.environ.get("ELASTICSEARCH_EMBEDDING_MODEL_ID")

# Initialize a CosmosDB client with AAD auth and containers for Chat History
cosmos_conversation_client = None
if AZURE_COSMOSDB_DATABASE and AZURE_COSMOSDB_ACCOUNT and AZURE_COSMOSDB_CONVERSATIONS_CONTAINER:
Expand Down Expand Up @@ -263,7 +279,45 @@ def prepare_body_headers_with_data(request):
"queryType": query_type,
"roleInformation": AZURE_OPENAI_SYSTEM_MESSAGE
}
})
}
)

elif DATASOURCE_TYPE == "Elasticsearch":
body["dataSources"].append(
{
"messages": request_messages,
"temperature": float(AZURE_OPENAI_TEMPERATURE),
"max_tokens": int(AZURE_OPENAI_MAX_TOKENS),
"top_p": float(AZURE_OPENAI_TOP_P),
"stop": AZURE_OPENAI_STOP_SEQUENCE.split("|") if AZURE_OPENAI_STOP_SEQUENCE else None,
"stream": SHOULD_STREAM,
"dataSources": [
{
"type": "AzureCognitiveSearch",
"parameters": {
"endpoint": ELASTICSEARCH_ENDPOINT,
"encodedApiKey": ELASTICSEARCH_ENCODED_API_KEY,
"indexName": ELASTICSEARCH_INDEX,
"fieldsMapping": {
"contentFields": ELASTICSEARCH_CONTENT_COLUMNS.split("|") if ELASTICSEARCH_CONTENT_COLUMNS else [],
"titleField": ELASTICSEARCH_TITLE_COLUMN if ELASTICSEARCH_TITLE_COLUMN else None,
"urlField": ELASTICSEARCH_URL_COLUMN if ELASTICSEARCH_URL_COLUMN else None,
"filepathField": ELASTICSEARCH_FILENAME_COLUMN if ELASTICSEARCH_FILENAME_COLUMN else None,
"vectorFields": ELASTICSEARCH_VECTOR_COLUMNS.split("|") if ELASTICSEARCH_VECTOR_COLUMNS else []
},
"inScope": True if ELASTICSEARCH_ENABLE_IN_DOMAIN.lower() == "true" else False,
"topNDocuments": int(ELASTICSEARCH_TOP_K),
"queryType": ELASTICSEARCH_QUERY_TYPE,
"roleInformation": AZURE_OPENAI_SYSTEM_MESSAGE,
"embeddingEndpoint": AZURE_OPENAI_EMBEDDING_ENDPOINT,
"embeddingKey": AZURE_OPENAI_EMBEDDING_KEY,
"embeddingModelId": ELASTICSEARCH_EMBEDDING_MODEL_ID,
"strictness": int(ELASTICSEARCH_STRICTNESS)
}
}
]
}
)
else:
raise Exception(f"DATASOURCE_TYPE is not configured or unknown: {DATASOURCE_TYPE}")

Expand Down
137 changes: 137 additions & 0 deletions infrastructure/deployment.json
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,95 @@
"metadata": {
"description": "Enable chat history by deploying a Cosmos DB instance"
}
},
"ElasticsearchEndpoint": {
"type": "string",
"metadata": {
"description": "Endpoint to use to connect to an Elasticsearch cluster"
}
},
"ElasticsearchEncodedApiKey": {
"type": "securestring",
"metadata": {
"description": "Encoded API key credentials to use to connect to an Elasticsearch cluster"
}
},
"ElasticsearchIndex": {
"type": "string",
"metadata": {
"description": "Elasticsearch index to use for retrieving grounding data"
}
},
"ElasticsearchQueryType": {
"type": "string",
"defaultValue": "simple",
"metadata": {
"description": "Type of query to use for Elasticsearch data"
}
},
"ElasticsearchContentColumns": {
"type": "string",
"metadata": {
"description": "Elasticsearch index content columns"
}
},
"ElasticsearchFilenameColumn": {
"type": "string",
"metadata": {
"description": "Elasticsearch index filename column"
}
},
"ElasticsearchTitleColumn": {
"type": "string",
"metadata": {
"description": "Elasticsearch index title column"
}
},
"ElasticsearchUrlColumn": {
"type": "string",
"metadata": {
"description": "Elasticsearch index url column"
}
},
"ElasticsearchVectorColumns": {
"type": "string",
"metadata": {
"description": "Elasticsearch index vector columns"
}
},
"ElasticsearchTopK": {
"type": "int",
"defaultValue": 5,
"metadata": {
"description": "Top K results"
}
},
"ElasticsearchEnableInDomain": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Enable in domain"
}
},
"ElasticsearchStrictness": {
"type": "int",
"defaultValue": 3,
"allowedValues": [
1,
2,
3,
4,
5
],
"metadata": {
"description": "Elasticsearch strictness"
}
},
"ElasticsearchEmbeddingModelId": {
"type": "string",
"metadata": {
"description": "The model ID for a model deployed on Elasticsearch to use for generating embeddings for queries."
}
}
},
"variables": {
Expand Down Expand Up @@ -428,6 +517,54 @@
{
"name": "AZURE_COSMOSDB_CONVERSATIONS_CONTAINER",
"value": "[variables('cosmosdb_container_name')]"
},
{
"name": "ELASTICSEARCH_ENDPOINT",
"value": "[variables('ElasticsearchEndpoint')]"
},
{
"name": "ELASTICSEARCH_ENCODED_API_KEY",
"value": "[variables('ElasticsearchEncodedApiKey')]"
},
{
"name": "ELASTICSEARCH_INDEX",
"value": "[variables('ElasticsearchIndex')]"
},
{
"name": "ELASTICSEARCH_QUERY_TYPE",
"value": "[variables('ElasticsearchQueryType')]"
},
{
"name": "ELASTICSEARCH_TOP_K",
"value": "[variables('ElasticsearchTopK')]"
},
{
"name": "ELASTICSEARCH_ENABLE_IN_DOMAIN",
"value": "[variables('ElasticsearchEnableInDomain')]"
},
{
"name": "ELASTICSEARCH_CONTENT_COLUMNS",
"value": "[variables('ElasticsearchContentColumns')]"
},
{
"name": "ELASTICSEARCH_FILENAME_COLUMN",
"value": "[variables('ElasticsearchFilenameColumn')]"
},
{
"name": "ELASTICSEARCH_TITLE_COLUMN",
"value": "[variables('ElasticsearchTitleColumn')]"
},
{
"name": "ELASTICSEARCH_URL_COLUMN",
"value": "[variables('ElasticsearchUrlColumn')]"
},
{
"name": "ELASTICSEARCH_VECTOR_COLUMNS",
"value": "[variables('ElasticsearchVectorColumns')]"
},
{
"name": "ELASTICSEARCH_STRICTNESS",
"value": "[variables('ElasticsearchStrictness')]"
}
],
"linuxFxVersion": "[variables('WebAppImageName')]"
Expand Down
Loading

0 comments on commit 2967b29

Please sign in to comment.