Skip to content

Commit

Permalink
.Net: Allow custom serialization options to be passed to TextMemoryPl…
Browse files Browse the repository at this point in the history
…ugin. (microsoft#7098)

### Motivation and Context

When `TextMemoryPlugin` gets back multiple results from the memory store
during recall, it uses json serialization to
turn these multiple results into a single string.
In some cases, this may cause unintended side-affects, e.g. when
non-latin script is present in the results, `JsonSerializer`
automatically escapes these characters.
Adding support for passing custom `JsonSerializerOptions` into the
`TextMemoryPlugin` to customize the serialization behavior.

See microsoft#6593

### Description

Added optional `JsonSerializerOptions` to the `TextMemoryPlugin`
constructor.
Added example to demonstrate this scenario.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone 😄
  • Loading branch information
westey-m authored Jul 4, 2024
1 parent ca7a58e commit c8fa30b
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.Memory;
using Microsoft.SemanticKernel.Plugins.Memory;

namespace Memory;

/// <summary>
/// This example shows how to use custom <see cref="JsonSerializerOptions"/> when serializing multiple results during recall using <see cref="TextMemoryPlugin"/>.
/// </summary>
/// <remarks>
/// When multiple results are returned during recall, <see cref="TextMemoryPlugin"/> has to turn these results into a string to pass back to the kernel.
/// The <see cref="TextMemoryPlugin"/> uses <see cref="JsonSerializer"/> to turn the results into a string.
/// In some cases though, the default serialization options may not work, e.g. if the memories contain non-latin text, <see cref="JsonSerializer"/>
/// will escape these characters by default. In this case, you can provide custom <see cref="JsonSerializerOptions"/> to the <see cref="TextMemoryPlugin"/> to control how the memories are serialized.
/// </remarks>
public class TextMemoryPlugin_RecallJsonSerializationWithOptions(ITestOutputHelper output) : BaseTest(output)
{
[Fact]
public async Task RunAsync()
{
// Create a Kernel.
var kernelWithoutOptions = Kernel.CreateBuilder()
.Build();

// Create an embedding generator to use for semantic memory.
var embeddingGenerator = new AzureOpenAITextEmbeddingGenerationService(TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, TestConfiguration.AzureOpenAIEmbeddings.Endpoint, TestConfiguration.AzureOpenAIEmbeddings.ApiKey);

// Using an in memory store for this example.
var memoryStore = new VolatileMemoryStore();

// The combination of the text embedding generator and the memory store makes up the 'SemanticTextMemory' object used to
// store and retrieve memories.
SemanticTextMemory textMemory = new(memoryStore, embeddingGenerator);
await textMemory.SaveInformationAsync("samples", "First example of some text in Thai and Bengali: วรรณยุกต์ চলিতভাষা", "test-record-1");
await textMemory.SaveInformationAsync("samples", "Second example of some text in Thai and Bengali: วรรณยุกต์ চলিতভাষা", "test-record-2");

// Import the TextMemoryPlugin into the Kernel without any custom JsonSerializerOptions.
var memoryPluginWithoutOptions = kernelWithoutOptions.ImportPluginFromObject(new TextMemoryPlugin(textMemory));

// Retrieve the memories using the TextMemoryPlugin.
var resultWithoutOptions = await kernelWithoutOptions.InvokeAsync(memoryPluginWithoutOptions["Recall"], new()
{
[TextMemoryPlugin.InputParam] = "Text examples",
[TextMemoryPlugin.CollectionParam] = "samples",
[TextMemoryPlugin.LimitParam] = "2",
[TextMemoryPlugin.RelevanceParam] = "0.79",
});

// The recall operation returned the following text, where the Thai and Bengali text was escaped:
// ["Second example of some text in Thai and Bengali: \u0E27\u0E23\u0E23\u0E13\u0E22\u0E38\u0E01\u0E15\u0E4C \u099A\u09B2\u09BF\u09A4\u09AD\u09BE\u09B7\u09BE","First example of some text in Thai and Bengali: \u0E27\u0E23\u0E23\u0E13\u0E22\u0E38\u0E01\u0E15\u0E4C \u099A\u09B2\u09BF\u09A4\u09AD\u09BE\u09B7\u09BE"]
Console.WriteLine(resultWithoutOptions.GetValue<string>());

// Create a Kernel.
var kernelWithOptions = Kernel.CreateBuilder()
.Build();

// Import the TextMemoryPlugin into the Kernel with custom JsonSerializerOptions that allow Thai and Bengali script to be serialized unescaped.
var options = new JsonSerializerOptions { Encoder = JavaScriptEncoder.Create(UnicodeRanges.BasicLatin, UnicodeRanges.Thai, UnicodeRanges.Bengali) };
var memoryPluginWithOptions = kernelWithOptions.ImportPluginFromObject(new TextMemoryPlugin(textMemory, jsonSerializerOptions: options));

// Retrieve the memories using the TextMemoryPlugin.
var result = await kernelWithOptions.InvokeAsync(memoryPluginWithOptions["Recall"], new()
{
[TextMemoryPlugin.InputParam] = "Text examples",
[TextMemoryPlugin.CollectionParam] = "samples",
[TextMemoryPlugin.LimitParam] = "2",
[TextMemoryPlugin.RelevanceParam] = "0.79",
});

// The recall operation returned the following text, where the Thai and Bengali text was not escaped:
// ["Second example of some text in Thai and Bengali: วรรณยุกต์ চলিতভাষা","First example of some text in Thai and Bengali: วรรณยุกต์ চলিতভাষা"]
Console.WriteLine(result.GetValue<string>());
}
}
1 change: 1 addition & 0 deletions dotnet/samples/Concepts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ Down below you can find the code snippets that demonstrate the usage of many Sem
- [TextChunkingAndEmbedding](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextChunkingAndEmbedding.cs)
- [TextMemoryPlugin_GeminiEmbeddingGeneration](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_GeminiEmbeddingGeneration.cs)
- [TextMemoryPlugin_MultipleMemoryStore](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_MultipleMemoryStore.cs)
- [TextMemoryPlugin_RecallJsonSerializationWithOptions](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_RecallJsonSerializationWithOptions.cs)

## Optimization - Examples of different cost and performance optimization techniques

Expand Down
12 changes: 9 additions & 3 deletions dotnet/src/Plugins/Plugins.Memory/TextMemoryPlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,22 @@ public sealed class TextMemoryPlugin

private readonly ISemanticTextMemory _memory;
private readonly ILogger _logger;
private readonly JsonSerializerOptions? _jsonSerializerOptions;

/// <summary>
/// Creates a new instance of the TextMemoryPlugin
/// Initializes a new instance of the <see cref="TextMemoryPlugin"/> class.
/// </summary>
/// <param name="memory">The <see cref="ISemanticTextMemory"/> instance to use for retrieving and saving memories to and from storage.</param>
/// <param name="loggerFactory">The <see cref="ILoggerFactory"/> to use for logging. If null, no logging will be performed.</param>
/// <param name="jsonSerializerOptions">An optional <see cref="JsonSerializerOptions"/> to use when turning multiple memories into json text. If null, <see cref="JsonSerializerOptions.Default"/> is used.</param>
public TextMemoryPlugin(
ISemanticTextMemory memory,
ILoggerFactory? loggerFactory = null)
ILoggerFactory? loggerFactory = null,
JsonSerializerOptions? jsonSerializerOptions = null)
{
this._memory = memory;
this._logger = loggerFactory?.CreateLogger(typeof(TextMemoryPlugin)) ?? NullLogger.Instance;
this._jsonSerializerOptions = jsonSerializerOptions ?? JsonSerializerOptions.Default;
}

/// <summary>
Expand Down Expand Up @@ -128,7 +134,7 @@ public async Task<string> RecallAsync(
return string.Empty;
}

return limit == 1 ? memories[0].Metadata.Text : JsonSerializer.Serialize(memories.Select(x => x.Metadata.Text));
return limit == 1 ? memories[0].Metadata.Text : JsonSerializer.Serialize(memories.Select(x => x.Metadata.Text), this._jsonSerializerOptions);
}

/// <summary>
Expand Down

0 comments on commit c8fa30b

Please sign in to comment.