Merge branch 'main' into main

slreznit · Sep 3, 2024 · 7d2c46c · 7d2c46c
2 parents 3f3289c + 3a6c7b7
commit 7d2c46c
Show file tree

Hide file tree

Showing 21 changed files with 461 additions and 7,406 deletions.
diff --git a/.env.sample b/.env.sample
@@ -118,3 +118,19 @@ PROMPTFLOW_RESPONSE_TIMEOUT=120
 PROMPTFLOW_REQUEST_FIELD_NAME=query
 PROMPTFLOW_RESPONSE_FIELD_NAME=reply
 PROMPTFLOW_CITATIONS_FIELD_NAME=documents
+# Chat with data: MongoDB database
+MONGODB_ENDPOINT=
+MONGODB_USERNAME=
+MONGODB_PASSWORD=
+MONGODB_DATABASE_NAME=
+MONGODB_COLLECTION_NAME=
+MONGODB_APP_NAME=
+MONGODB_INDEX_NAME=
+MONGODB_TOP_K=
+MONGODB_STRICTNESS=
+MONGODB_ENABLE_IN_DOMAIN=
+MONGODB_CONTENT_COLUMNS=
+MONGODB_FILENAME_COLUMN=
+MONGODB_TITLE_COLUMN=
+MONGODB_URL_COLUMN=
+MONGODB_VECTOR_COLUMNS=
diff --git a/README.md b/README.md
@@ -10,6 +10,7 @@ This repo contains sample code for a simple chat webapp that integrates with Azu
   - Elasticsearch index (preview)
   - Pinecone index (private preview)
   - Azure SQL Server (private preview)
+  - Mongo DB (preview)
 
 ## Configure the app
 
@@ -59,9 +60,6 @@ Please see the [section below](#add-an-identity-provider) for important informat
 
 3. You can see the local running app at http://127.0.0.1:50505.
 
-NOTE: You may find you need to set: MacOS: `export NODE_OPTIONS="--max-old-space-size=8192"` or Windows: `set NODE_OPTIONS=--max-old-space-size=8192` to avoid running out of memory when building the frontend.
-
-
 ### Deploy with the Azure CLI
 
 #### Create the Azure App Service
@@ -283,6 +281,34 @@ Note: RBAC assignments can take a few minutes before becoming effective.
     - `AZURE_OPENAI_EMBEDDING_NAME`: the name of your Ada (text-embedding-ada-002) model deployment on your Azure OpenAI resource.
     - `PINECONE_VECTOR_COLUMNS`: the vector columns in your index to use when searching. Join them with `|` like `contentVector|titleVector`.
 
+#### Chat with your data using Mongo DB (Private Preview)
+
+1. Update the `AZURE_OPENAI_*` environment variables as described in the [basic chat experience](#basic-chat-experience) above. 
+
+2. To connect to your data, you need to specify an Mongo DB database configuration.  Learn more about [MongoDB](https://www.mongodb.com/).
+
+3. Configure data source settings as described in the table below.
+
+    | App Setting | Required? | Default Value | Note |
+    | --- | --- | --- | ------------- |
+    |DATASOURCE_TYPE|Yes||Must be set to `MongoDB`|
+    |MONGODB_CONNECTION_STRING|Yes||The connection string used to connect to your Mongo DB instance|
+    |MONGODB_VECTOR_INDEX|Yes||The name of your Mongo DB vector index|
+    |MONGODB_DATABASE_NAME|Yes||The name of your Mongo DB database|
+    |MONGODB_CONTAINER_NAME|Yes||The name of your Mongo DB container|
+    |MONGODB_TOP_K|No|5|The number of documents to retrieve when querying your search index.|
+    |MONGODB_ENABLE_IN_DOMAIN|No|True|Limits responses to only queries relating to your data.|
+    |MONGODB_STRICTNESS|No|3|Integer from 1 to 5 specifying the strictness for the model limiting responses to your data.|
+    |MONGODB_CONTENT_COLUMNS|No||List of fields in your search index that contains the text content of your documents to use when formulating a bot response. Represent these as a string joined with "|", e.g. `"product_description|product_manual"`|
+    |MONGODB_FILENAME_COLUMN|No|| Field from your search index that gives a unique identifier of the source of your data to display in the UI.|
+    |MONGODB_TITLE_COLUMN|No||Field from your search index that gives a relevant title or header for your data content to display in the UI.|
+    |MONGODB_URL_COLUMN|No||Field from your search index that contains a URL for the document, e.g. an Azure Blob Storage URI. This value is not currently used.|
+    |MONGODB_VECTOR_COLUMNS|No||List of fields in your search index that contain vector embeddings of your documents to use when formulating a bot response. Represent these as a string joined with "|", e.g. `"product_description|product_manual"`|
+
+    MongoDB uses vector search by default, so ensure these settings are configured on your app:
+    - `AZURE_OPENAI_EMBEDDING_NAME`: the name of your Ada (text-embedding-ada-002) model deployment on your Azure OpenAI resource.
+    - `MONGODB_VECTOR_COLUMNS`: the vector columns in your index to use when searching. Join them with `|` like `contentVector|titleVector`.
+
 #### Chat with your data using Azure SQL Server (Private Preview)
 
 1. Update the `AZURE_OPENAI_*` environment variables as described in the [basic chat experience](#basic-chat-experience) above. 
@@ -296,6 +322,9 @@ Note: RBAC assignments can take a few minutes before becoming effective.
     |DATASOURCE_TYPE|Yes||Must be set to `AzureSqlServer`|
     |AZURE_SQL_SERVER_CONNECTION_STRING|Yes||The connection string to use to connect to your Azure SQL Server instance|
     |AZURE_SQL_SERVER_TABLE_SCHEMA|Yes||The table schema for your Azure SQL Server table.  Must be surrounded by double quotes (`"`).|
+    |AZURE_SQL_SERVER_PORT||Not publicly available at this time.|The port to use to connect to your Azure SQL Server instance.|
+    |AZURE_SQL_SERVER_DATABASE_NAME||Not publicly available at this time.|
+    |AZURE_SQL_SERVER_DATABASE_SERVER||Not publicly available at this time.|
 
 #### Chat with your data using Promptflow
 
@@ -391,7 +420,6 @@ We recommend keeping these best practices in mind:
 
 **A note on Azure OpenAI API versions**: The application code in this repo will implement the request and response contracts for the most recent preview API version supported for Azure OpenAI.  To keep your application up-to-date as the Azure OpenAI API evolves with time, be sure to merge the latest API version update into your own application code and redeploy using the methods described in this document.
 
-
 ## Contributing
 
 This project welcomes contributions and suggestions.  Most contributions require you to agree to a

diff --git a/app.py b/app.py
@@ -218,12 +218,22 @@ def prepare_model_args(request_body, request_headers):
 
     for message in request_messages:
         if message:
-            messages.append(
-                {
-                    "role": message["role"],
-                    "content": message["content"]
-                }
-            )
+            if message["role"] == "assistant" and "context" in message:
+                context_obj = json.loads(message["context"])
+                messages.append(
+                    {
+                        "role": message["role"],
+                        "content": message["content"],
+                        "context": context_obj
+                    }
+                )
+            else:
+                messages.append(
+                    {
+                        "role": message["role"],
+                        "content": message["content"]
+                    }
+                )
 
     user_json = None
     if (MS_DEFENDER_ENABLED):

diff --git a/backend/settings.py b/backend/settings.py
@@ -182,7 +182,7 @@ def extract_embedding_dependency(self) -> Optional[dict]:
                     "endpoint": self.embedding_endpoint,
                     "authentication": {
                         "type": "api_key",
-                        "api_key": self.embedding_key
+                        "key": self.embedding_key
                     }
                 }
             else:
@@ -625,24 +625,33 @@ class _AzureSqlServerSettings(BaseSettings, DatasourcePayloadConstructor):
     model_config = SettingsConfigDict(
         env_prefix="AZURE_SQL_SERVER_",
         env_file=DOTENV_PATH,
-        extra="ignore"
+        extra="ignore",
+        env_ignore_empty=True
     )
     _type: Literal["azure_sql_server"] = PrivateAttr(default="azure_sql_server")
 
-    connection_string: str = Field(exclude=True)
-    table_schema: str
+    connection_string: Optional[str] = Field(default=None, exclude=True)
+    table_schema: Optional[str] = None
     schema_max_row: Optional[int] = None
     top_n_results: Optional[int] = None
+    database_server: Optional[str] = None
+    database_name: Optional[str] = None
+    port: Optional[int] = None
 
     # Constructed fields
     authentication: Optional[dict] = None
 
     @model_validator(mode="after")
     def construct_authentication(self) -> Self:
-        self.authentication = {
-            "type": "connection_string",
-            "connection_string": self.connection_string
-        }
+        if self.connection_string:
+            self.authentication = {
+                "type": "connection_string",
+                "connection_string": self.connection_string
+            }
+        elif self.database_server and self.database_name and self.port:
+            self.authentication = {
+                "type": "system_assigned_managed_identity"
+            }
         return self
 
     def construct_payload_configuration(
@@ -658,7 +667,84 @@ def construct_payload_configuration(
             "parameters": parameters
         }
 
+
+class _MongoDbSettings(BaseSettings, DatasourcePayloadConstructor):
+    model_config = SettingsConfigDict(
+        env_prefix="MONGODB_",
+        env_file=DOTENV_PATH,
+        extra="ignore",
+        env_ignore_empty=True
+    )
+    _type: Literal["mongo_db"] = PrivateAttr(default="mongo_db")
 
+    endpoint: str
+    username: str = Field(exclude=True)
+    password: str = Field(exclude=True)
+    database_name: str
+    collection_name: str
+    app_name: str
+    index_name: str
+    query_type: Literal["vector"] = "vector"
+    top_k: int = Field(default=5, serialization_alias="top_n_documents")
+    strictness: int = 3
+    enable_in_domain: bool = Field(default=True, serialization_alias="in_scope")
+    content_columns: Optional[List[str]] = Field(default=None, exclude=True)
+    vector_columns: Optional[List[str]] = Field(default=None, exclude=True)
+    title_column: Optional[str] = Field(default=None, exclude=True)
+    url_column: Optional[str] = Field(default=None, exclude=True)
+    filename_column: Optional[str] = Field(default=None, exclude=True)
+
+
+    # Constructed fields
+    authentication: Optional[dict] = None
+    embedding_dependency: Optional[dict] = None
+    fields_mapping: Optional[dict] = None
+
+    @field_validator('content_columns', 'vector_columns', mode="before")
+    @classmethod
+    def split_columns(cls, comma_separated_string: str) -> List[str]:
+        if isinstance(comma_separated_string, str) and len(comma_separated_string) > 0:
+            return parse_multi_columns(comma_separated_string)
+
+        return None
+
+    @model_validator(mode="after")
+    def set_fields_mapping(self) -> Self:
+        self.fields_mapping = {
+            "content_fields": self.content_columns,
+            "title_field": self.title_column,
+            "url_field": self.url_column,
+            "filepath_field": self.filename_column,
+            "vector_fields": self.vector_columns
+        }
+        return self
+
+    @model_validator(mode="after")
+    def construct_authentication(self) -> Self:
+        self.authentication = {
+            "type": "username_and_password",
+            "username": self.username,
+            "password": self.password
+        }
+        return self
+
+    def construct_payload_configuration(
+        self,
+        *args,
+        **kwargs
+    ):
+        self.embedding_dependency = \
+            self._settings.azure_openai.extract_embedding_dependency()
+
+        parameters = self.model_dump(exclude_none=True, by_alias=True)
+        parameters.update(self._settings.search.model_dump(exclude_none=True, by_alias=True))
+
+        return {
+            "type": self._type,
+            "parameters": parameters
+        }
+
+
 class _BaseSettings(BaseSettings):
     model_config = SettingsConfigDict(
         env_file=DOTENV_PATH,
@@ -729,15 +815,20 @@ def set_datasource_settings(self) -> Self:
             elif self.base_settings.datasource_type == "AzureSqlServer":
                 self.datasource = _AzureSqlServerSettings(settings=self, _env_file=DOTENV_PATH)
                 logging.debug("Using SQL Server")
+
+            elif self.base_settings.datasource_type == "MongoDB":
+                self.datasource = _MongoDbSettings(settings=self, _env_file=DOTENV_PATH)
+                logging.debug("Using Mongo DB")
 
             else:
                 self.datasource = None
                 logging.warning("No datasource configuration found in the environment -- calls will be made to Azure OpenAI without grounding data.")
 
             return self
 
-        except ValidationError:
+        except ValidationError as e:
             logging.warning("No datasource configuration found in the environment -- calls will be made to Azure OpenAI without grounding data.")
+            logging.warning(e.errors())
 
 
 app_settings = _AppSettings()