Skip to content

Commit

Permalink
Refactor lantern_extras SQL functions:
Browse files Browse the repository at this point in the history
- Remove `cohere_embedding`, `clip_text`, `clip_image` functions
- Add `llm_embedding` function
- Refactor arguments for `llm_completion` function
- Refactor arguments for `add_embedding_job` function
- Refactor arguments for `add_completion_job` function
- Remove GUC `lantern_extras.openai_azure_api_token`, `lantern_extras.cohere_token` and use `lantern_extras.llm_token` instead
  • Loading branch information
var77 committed Nov 1, 2024
1 parent 7b0b553 commit 7834a19
Show file tree
Hide file tree
Showing 8 changed files with 272 additions and 282 deletions.
18 changes: 8 additions & 10 deletions lantern_cli/src/embeddings/core/openai_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ pub struct OpenAiRuntime<'a> {
request_timeout: u64,
base_url: String,
headers: Vec<(String, String)>,
context: serde_json::Value,
system_prompt: serde_json::Value,
dimensions: Option<usize>,
deployment_type: OpenAiDeployment,
#[allow(dead_code)]
Expand All @@ -199,9 +199,8 @@ pub struct OpenAiRuntime<'a> {
pub struct OpenAiRuntimeParams {
pub base_url: Option<String>,
pub api_token: Option<String>,
pub azure_api_token: Option<String>,
pub azure_entra_token: Option<String>,
pub context: Option<String>,
pub system_prompt: Option<String>,
pub dimensions: Option<usize>,
}

Expand All @@ -223,15 +222,14 @@ impl<'a> OpenAiRuntime<'a> {
}
OpenAiDeployment::Azure => {
// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
if runtime_params.azure_api_token.is_none()
&& runtime_params.azure_entra_token.is_none()
if runtime_params.api_token.is_none() && runtime_params.azure_entra_token.is_none()
{
anyhow::bail!(
"'azure_api_key' or 'azure_entra_id' is required for Azure OpenAi runtime"
"'api_token' or 'azure_entra_id' is required for Azure OpenAi runtime"
);
}

if let Some(key) = runtime_params.azure_api_token {
if let Some(key) = runtime_params.api_token {
("api-key".to_owned(), format!("{}", key))
} else {
(
Expand All @@ -242,7 +240,7 @@ impl<'a> OpenAiRuntime<'a> {
}
};

let context = match &runtime_params.context {
let system_prompt = match &runtime_params.system_prompt {
Some(system_prompt) => json!({ "role": "system", "content": system_prompt.clone()}),
None => json!({ "role": "system", "content": "" }),
};
Expand All @@ -257,7 +255,7 @@ impl<'a> OpenAiRuntime<'a> {
auth_header,
],
dimensions: runtime_params.dimensions,
context,
system_prompt,
})
}

Expand Down Expand Up @@ -388,7 +386,7 @@ impl<'a> OpenAiRuntime<'a> {
serde_json::to_string(&json!({
"model": model_name,
"messages": [
self.context,
self.system_prompt,
{ "role": "user", "content": query }
]
}))?,
Expand Down
2 changes: 1 addition & 1 deletion lantern_cli/tests/daemon_completion_test_with_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ async fn test_daemon_completion_init_job() {
('Test5');
INSERT INTO _lantern_extras_internal.embedding_generation_jobs ("id", "table", src_column, dst_column, embedding_model, runtime, runtime_params, job_type, column_type)
VALUES (1, '{CLIENT_TABLE_NAME}', 'title', 'num', 'openai/gpt-4o', 'openai', '{{"api_token": "{api_token}", "context": "Given text testN, return the N as number without any quotes, so for Test1 you should return 1, Test105 you should return 105" }}', 'completion', 'INT');
VALUES (1, '{CLIENT_TABLE_NAME}', 'title', 'num', 'openai/gpt-4o', 'openai', '{{"api_token": "{api_token}", "system_prompt": "Given text testN, return the N as number without any quotes, so for Test1 you should return 1, Test105 you should return 105" }}', 'completion', 'INT');
"#
))
.await
Expand Down
6 changes: 3 additions & 3 deletions lantern_cli/tests/embedding_test_with_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ async fn test_openai_completion_from_db() {
limit: Some(10),
filter: None,
runtime: Runtime::OpenAi,
runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "you will be given text, return postgres array of TEXT[] by splitting the text by characters skipping spaces. Example 'te st' -> {{t,e,s,t}} . Do not put tailing commas, do not put double or single quotes around characters" }}"#),
runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "you will be given text, return postgres array of TEXT[] by splitting the text by characters skipping spaces. Example 'te st' -> {{t,e,s,t}} . Do not put tailing commas, do not put double or single quotes around characters" }}"#),
create_column: true,
stream: true,
job_type: Some(EmbeddingJobType::Completion),
Expand Down Expand Up @@ -241,7 +241,7 @@ async fn test_openai_completion_special_chars_from_db() {
limit: Some(2),
filter: None,
runtime: Runtime::OpenAi,
runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "for any input return multi line text which will contain escape characters which can potentially break postgres COPY" }}"#),
runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "for any input return multi line text which will contain escape characters which can potentially break postgres COPY" }}"#),
create_column: true,
stream: true,
job_type: Some(EmbeddingJobType::Completion),
Expand Down Expand Up @@ -319,7 +319,7 @@ async fn test_openai_completion_failed_rows_from_db() {
limit: Some(10),
filter: None,
runtime: Runtime::OpenAi,
runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "you will be given text, return array by splitting the text by characters skipping spaces. Example 'te st' -> [t,e,s,t]" }}"#),
runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "you will be given text, return array by splitting the text by characters skipping spaces. Example 'te st' -> [t,e,s,t]" }}"#),
create_column: true,
stream: true,
job_type: Some(EmbeddingJobType::Completion),
Expand Down
6 changes: 3 additions & 3 deletions lantern_cli/tests/query_completion_test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use lantern_cli::embeddings::core::{EmbeddingRuntime, Runtime};
use std::env;

static LLM_CONTEXT: &'static str = "You will be provided JSON with the following schema: {x: string}, answer to the message returning the x propery from the provided JSON object";
static LLM_SYSTEM_PROMPT: &'static str = "You will be provided JSON with the following schema: {x: string}, answer to the message returning the x propery from the provided JSON object";

macro_rules! query_completion_test {
($($name:ident: $value:expr,)*) => {
Expand All @@ -19,7 +19,7 @@ macro_rules! query_completion_test {
return;
}

let params = format!(r#"{{"api_token": "{api_token}", "context": "{LLM_CONTEXT}"}}"#);
let params = format!(r#"{{"api_token": "{api_token}", "system_prompt": "{LLM_SYSTEM_PROMPT}"}}"#);

let runtime = EmbeddingRuntime::new(&runtime_name, None, &params).unwrap();
let output = runtime.completion(
Expand Down Expand Up @@ -62,7 +62,7 @@ macro_rules! query_completion_test_multiple {
expected_output.push(output);
}

let params = format!(r#"{{"api_token": "{api_token}", "context": "{LLM_CONTEXT}"}}"#);
let params = format!(r#"{{"api_token": "{api_token}", "system_prompt": "{LLM_SYSTEM_PROMPT}"}}"#);

let runtime = EmbeddingRuntime::new(&runtime_name, None, &params).unwrap();
let output = runtime.batch_completion(
Expand Down
84 changes: 59 additions & 25 deletions lantern_extras/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,34 @@ FROM papers;
-- generate embeddings from other models which can be extended

```sql
SELECT llm_embedding(
input => 'User input', -- User prompt to LLM model
model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
dimensions => 1536, -- For new generation OpenAi models you can provide dimensions for returned embeddings. (default: 1536)
input_type => 'search_query', -- Needed only for cohere runtime to indicate if this input is for search or storing. (default: 'search_query'). Can also be 'search_document'
runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
);

-- generate text embedding
SELECT text_embedding('BAAI/bge-base-en', 'My text input');
SELECT llm_embedding(model => 'BAAI/bge-base-en', input => 'My text input', runtime => 'ort');
-- generate image embedding with image url
SELECT image_embedding('clip/ViT-B-32-visual', 'https://link-to-your-image');
SELECT llm_embedding(model => 'clip/ViT-B-32-visual', input => 'https://link-to-your-image', runtime => 'ort');
-- generate image embedding with image path (this path should be accessible from postgres server)
SELECT image_embedding('clip/ViT-B-32-visual', '/path/to/image/in-postgres-server');
SELECT llm_embedding(model => 'clip/ViT-B-32-visual', input => '/path/to/image/in-postgres-server', runtime => 'ort');
-- get available list of models
SELECT get_available_models();
-- generate openai embeddings
SELECT llm_embedding(model => 'text-embedding-3-small', api_token => '<openai_api_token>', input => 'My text input', runtime => 'openai');
-- generate embeddings from custom openai compatible servers
SELECT llm_embedding(model => 'intfloat/e5-mistral-7b-instruct', api_token => '<api_token>', input => 'My text input', runtime => 'openai', base_url => 'https://my-llm-url');
-- generate cohere embeddings
SELECT llm_embedding(model => 'embed-multilingual-light-v3.0', api_token => '<cohere_api_token>', input => 'My text input', runtime => 'cohere');
-- api_token can be set via GUC
SET lantern_extras.llm_token = '<api_token>';
SELECT llm_embedding(model => 'text-embedding-3-small', input => 'My text input', runtime => 'openai');
```

## Getting started
Expand Down Expand Up @@ -135,7 +155,7 @@ To add new textual or visual models for generating vector embeddings you can fol
After this your model should be callable from SQL like

```sql
SELECT text_embedding('your/model_name', 'Your text');
SELECT llm_embedding(model => 'your/model_name', input => 'Your text', runtime => 'ort');
```

## Lantern Daemon in SQL
Expand All @@ -158,14 +178,18 @@ To add a new embedding job, use the `add_embedding_job` function:

```sql
SELECT add_embedding_job(
'table_name', -- Name of the table
'src_column', -- Source column for embeddings
'dst_column', -- Destination column for embeddings
'embedding_model', -- Embedding model to use
'runtime', -- Runtime environment (default: 'ort')
'runtime_params', -- Runtime parameters (default: '{}')
'pk', -- Primary key column (default: 'id')
'schema' -- Schema name (default: 'public')
table => 'articles', -- Name of the table
src_column => 'content', -- Source column for embeddings
dst_column => 'content_embedding', -- Destination column for embeddings (will be created automatically)
model => 'text-embedding-3-small', -- Model for runtime to use (default: 'text-embedding-3-small')
pk => 'id', -- Primary key of the table. It is required for table to have primary key (default: id)
schema => 'public', -- Schema on which the table is located (default: 'public')
base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
batch_size => 500, -- Batch size for the inputs to use when requesting LLM server. This is based on your API tier. (default: determined based on model and runtime)
dimensions => 1536, -- For new generation OpenAi models you can provide dimensions for returned embeddings. (default: 1536)
api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
);
```

Expand Down Expand Up @@ -200,17 +224,19 @@ To add a new completion job, use the `add_completion_job` function:

```sql
SELECT add_completion_job(
'table_name', -- Name of the table
'src_column', -- Source column for embeddings
'dst_column', -- Destination column for embeddings
'context', -- System prompt to be used for LLM (default: lantern_extras.completion_context GUC)
'column_type', -- Target column type to be used for destination (default: TEXT)
'model', -- LLM model to use (default: 'gpt-4o')
'batch_size', -- Batch size to use when sending batch requests (default: 2)
'runtime', -- Runtime environment (default: 'openai')
'runtime_params', -- Runtime parameters (default: '{}' inferred from GUC variables)
'pk', -- Primary key column (default: 'id')
'schema' -- Schema name (default: 'public')
table => 'articles', -- Name of the table
src_column => 'content', -- Source column for embeddings
dst_column => 'content_summary', -- Destination column for llm response (will be created automatically)
system_prompt => 'Provide short summary for the given text', -- System prompt for LLM (default: '')
column_type => 'TEXT', -- Destination column type
model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
pk => 'id', -- Primary key of the table. It is required for table to have primary key (default: id)
schema => 'public', -- Schema on which the table is located (default: 'public')
base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
batch_size => 10, -- Batch size for the inputs to use when requesting LLM server. This is based on your API tier. (default: determined based on model and runtime)
api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
);
```

Expand Down Expand Up @@ -258,6 +284,14 @@ This will return a table with the following columns:

***Calling LLM Completion API***
```sql
SET lantern_extras.llm_token='xxxx';
SELECT llm_completion(query, [model, context, base_url, runtime]);
SET lantern_extras.llm_token='xxxx'; -- this will be used as api_token if it is not passed via arguments
SELECT llm_completion(
user_prompt => 'User input', -- User prompt to LLM model
model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
system_prompt => 'Provide short summary for the given text', -- System prompt for LLM (default: '')
base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
);
```
Loading

0 comments on commit 7834a19

Please sign in to comment.