Skip to content

Commit

Permalink
Update SQL Server Memory to use the new VECTOR type (#796)
Browse files Browse the repository at this point in the history
## Motivation and Context (Why the change? What's the scenario?)
Things are moving fast in the Vector Support for SQL Azure. Now that the
official VECTOR type has been introduced
(https://devblogs.microsoft.com/azure-sql/exciting-announcement-public-preview-of-native-vector-support-in-azure-sql-database/),
I have updated `SqlServerMemoryDb` to use it.

---------

Co-authored-by: Devis Lucato <[email protected]>
  • Loading branch information
marcominerva and dluc authored Nov 12, 2024
1 parent a60ecb8 commit 13429f9
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 27 deletions.
36 changes: 26 additions & 10 deletions extensions/SQLServer/SQLServer/DependencyInjection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,20 @@ public static IKernelMemoryBuilder WithSqlServerMemoryDb(
/// Kernel Memory Builder extension method to add SQL Server memory connector.
/// </summary>
/// <param name="builder">KM builder instance</param>
/// <param name="connString">SQL Server connection string</param>
/// <param name="useNativeVectorSearch">Whether to use native vector search or not</param>
/// <param name="connectionString">SQL Server connection string</param>
/// <param name="useNativeVectorSearch">Whether to use native vector search or not.</param>
/// <param name="vectorSize">When <paramref name="useNativeVectorSearch"/> is <see langword="true"/>, it is the vector size used by the VECTOR data type.</param>
/// <remarks>
/// Currently, the native Vector search is available on Azure SQL Database only.
/// See <a href="https://learn.microsoft.com/sql/relational-databases/vectors/vectors-sql-server">Overview of vectors in the SQL Database Engine</a> for more information about native Vectors support.
/// </remarks>
public static IKernelMemoryBuilder WithSqlServerMemoryDb(
this IKernelMemoryBuilder builder,
string connString,
bool useNativeVectorSearch = false)
string connectionString,
bool useNativeVectorSearch = false,
int vectorSize = SqlServerConfig.DefaultVectorSize)
{
builder.Services.AddSqlServerAsMemoryDb(connString, useNativeVectorSearch);
builder.Services.AddSqlServerAsMemoryDb(connectionString, useNativeVectorSearch, vectorSize);
return builder;
}
}
Expand All @@ -56,6 +62,8 @@ public static IServiceCollection AddSqlServerAsMemoryDb(
this IServiceCollection services,
SqlServerConfig config)
{
config.Validate();

return services
.AddSingleton<SqlServerConfig>(config)
.AddSingleton<IMemoryDb, SqlServerMemory>();
Expand All @@ -65,14 +73,22 @@ public static IServiceCollection AddSqlServerAsMemoryDb(
/// Inject SQL Server as the default implementation of IMemoryDb
/// </summary>
/// <param name="services">Service collection</param>
/// <param name="connString">SQL Server connection string</param>
/// <param name="useNativeVectorSearch">Whether to use native vector search or not</param>
/// <param name="connectionString">SQL Server connection string</param>
/// <param name="useNativeVectorSearch">Whether to use native vector search or not. Currently, the native Vector search is in Early Access Preview (EAP) and is available on Azure SQL Database and Managed Instance only.</param>
/// <param name="vectorSize">When <paramref name="useNativeVectorSearch"/> is <see langword="true"/>, it is the vector size used by the VECTOR SQL Server type.</param>
public static IServiceCollection AddSqlServerAsMemoryDb(
this IServiceCollection services,
string connString,
bool useNativeVectorSearch = false)
string connectionString,
bool useNativeVectorSearch = false,
int vectorSize = SqlServerConfig.DefaultVectorSize)
{
var config = new SqlServerConfig { ConnectionString = connString, UseNativeVectorSearch = useNativeVectorSearch };
var config = new SqlServerConfig
{
ConnectionString = connectionString,
UseNativeVectorSearch = useNativeVectorSearch,
VectorSize = vectorSize
};

return services.AddSqlServerAsMemoryDb(config);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public string PrepareGetRecordsListQuery(string index,
SqlParameterCollection parameters)
{
var queryColumns = "[key], [payload], [tags]";
if (withEmbeddings) { queryColumns += ", VECTOR_TO_JSON_ARRAY([embedding]) AS [embedding]"; }
if (withEmbeddings) { queryColumns += ", CAST([embedding] AS NVARCHAR(MAX)) AS [embedding]"; }

var sql = $"""
WITH [filters] AS
Expand Down Expand Up @@ -126,19 +126,19 @@ public string PrepareGetSimilarRecordsListQuery(string index,
if (withEmbedding)
{
queryColumns += $"," +
$"VECTOR_TO_JSON_ARRAY({this.GetFullTableName(this._config.MemoryTableName)}.[embedding]) AS [embedding]";
$"CAST({this.GetFullTableName(this._config.MemoryTableName)}.[embedding] AS NVARCHAR(MAX)) AS [embedding]";
}

var generatedFilters = this.GenerateFilters(index, parameters, filters);

var sql = $"""
SELECT TOP (@limit)
{queryColumns},
VECTOR_DISTANCE('cosine', JSON_ARRAY_TO_VECTOR(@vector), Embedding) AS [distance]
VECTOR_DISTANCE('cosine', CAST(@vector AS VECTOR({this._config.VectorSize})), Embedding) AS [distance]
FROM
{this.GetFullTableName(this._config.MemoryTableName)}
WHERE
VECTOR_DISTANCE('cosine', JSON_ARRAY_TO_VECTOR(@vector), Embedding) <= @max_distance
VECTOR_DISTANCE('cosine', CAST(@vector AS VECTOR({this._config.VectorSize})), Embedding) <= @max_distance
{generatedFilters}
ORDER BY [distance] ASC
""";
Expand All @@ -156,10 +156,10 @@ public string PrepareUpsertRecordsBatchQuery(string index)
USING (SELECT @key) as [src]([key])
ON {this.GetFullTableName(this._config.MemoryTableName)}.[key] = [src].[key]
WHEN MATCHED THEN
UPDATE SET payload=@payload, embedding=JSON_ARRAY_TO_VECTOR(@embedding), tags=@tags
UPDATE SET payload=@payload, embedding=CAST(@embedding AS VECTOR({this._config.VectorSize})), tags=@tags
WHEN NOT MATCHED THEN
INSERT ([id], [key], [collection], [payload], [tags], [embedding])
VALUES (NEWID(), @key, @index, @payload, @tags, JSON_ARRAY_TO_VECTOR(@embedding));
INSERT ([key], [collection], [payload], [tags], [embedding])
VALUES (@key, @index, @payload, @tags, CAST(@embedding AS VECTOR({this._config.VectorSize})));

DELETE FROM [tgt]
FROM {this.GetFullTableName($"{this._config.TagsTableName}_{index}")} AS [tgt]
Expand Down Expand Up @@ -211,12 +211,12 @@ PRIMARY KEY ([id])

IF OBJECT_ID(N'{this.GetFullTableName(this._config.MemoryTableName)}', N'U') IS NULL
CREATE TABLE {this.GetFullTableName(this._config.MemoryTableName)}
( [id] UNIQUEIDENTIFIER NOT NULL,
( [id] UNIQUEIDENTIFIER NOT NULL DEFAULT NEWSEQUENTIALID(),
[key] NVARCHAR(256) NOT NULL,
[collection] NVARCHAR(256) NOT NULL,
[payload] NVARCHAR(MAX),
[tags] NVARCHAR(MAX),
[embedding] VARBINARY(8000),
[embedding] VECTOR({this._config.VectorSize}),
PRIMARY KEY ([id]),
FOREIGN KEY ([collection]) REFERENCES {this.GetFullTableName(this._config.MemoryCollectionTableName)}([id]) ON DELETE CASCADE,
CONSTRAINT UK_{this._config.MemoryTableName} UNIQUE([collection], [key])
Expand Down
41 changes: 38 additions & 3 deletions extensions/SQLServer/SQLServer/SqlServerConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ public class SqlServerConfig
/// <summary>
/// The default schema used by the SQL Server memory store.
/// </summary>
public const string DefaultSchema = "dbo";
internal const string DefaultSchema = "dbo";

/// <summary>
/// The default vector size when using the native VECTOR type.
/// </summary>
internal const int DefaultVectorSize = 1536;

/// <summary>
/// The connection string to the SQL Server database.
Expand Down Expand Up @@ -66,8 +71,38 @@ public class SqlServerConfig
/// Whether to use native vector search or not.
/// </summary>
/// <remarks>
/// Currently, Vector Search supports only Azure SQL Database and can handle vectors up to 1998 dimensions.
/// See https://devblogs.microsoft.com/azure-sql/announcing-eap-native-vector-support-in-azure-sql-database for more information.
/// Currently, the native Vector search is available on Azure SQL Database only.
/// See <a href="https://learn.microsoft.com/sql/relational-databases/vectors/vectors-sql-server">Overview of vectors in the SQL Database Engine</a> for more information.
/// </remarks>
/// <seealso cref="VectorSize"/>
public bool UseNativeVectorSearch { get; set; } = false;

/// <summary>
/// The vector size when using the native vector search.
/// </summary>
/// <remarks>
/// Currently, the maximum supported vector size is 1998.
/// See <a href="https://learn.microsoft.com/sql/relational-databases/vectors/vectors-sql-server">Overview of vectors in the SQL Database Engine</a> for more information.
/// </remarks>
/// <seealso cref="UseNativeVectorSearch"/>
public int VectorSize { get; set; } = DefaultVectorSize;

/// <summary>
/// Verify that the current state is valid.
/// </summary>
public void Validate()
{
if (this.UseNativeVectorSearch)
{
if (this.VectorSize < 0)
{
throw new ConfigurationException("The vector size must be greater than 0");
}

if (this.VectorSize > 1998)
{
throw new ConfigurationException("The vector size must be less than or equal to 1998");
}
}
}
}
6 changes: 3 additions & 3 deletions extensions/SQLServer/SQLServer/SqlServerMemory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ public async IAsyncEnumerable<MemoryRecord> GetListAsync(

command.Parameters.AddWithValue("@min_relevance_score", minRelevance);
command.Parameters.AddWithValue("@max_distance", 1 - minRelevance);
command.Parameters.AddWithValue("@vector", JsonSerializer.Serialize(embedding.Data.ToArray()));
command.Parameters.AddWithValue("@vector", JsonSerializer.Serialize(embedding.Data));
command.Parameters.AddWithValue("@index", index);
command.Parameters.AddWithValue("@limit", limit);

Expand Down Expand Up @@ -326,7 +326,7 @@ public async Task<string> UpsertAsync(string index, MemoryRecord record, Cancell
{
if (!this._isReady) { await this.InitAsync(cancellationToken).ConfigureAwait(false); }

await foreach (var item in this.UpsertBatchAsync(index, new[] { record }, cancellationToken).ConfigureAwait(false))
await foreach (var item in this.UpsertBatchAsync(index, [record], cancellationToken).ConfigureAwait(false))
{
return item;
}
Expand Down Expand Up @@ -363,7 +363,7 @@ public async IAsyncEnumerable<string> UpsertBatchAsync(string index, IEnumerable
command.Parameters.AddWithValue("@key", record.Id);
command.Parameters.AddWithValue("@payload", JsonSerializer.Serialize(record.Payload) ?? (object)DBNull.Value);
command.Parameters.AddWithValue("@tags", JsonSerializer.Serialize(record.Tags) ?? (object)DBNull.Value);
command.Parameters.AddWithValue("@embedding", JsonSerializer.Serialize(record.Vector.Data.ToArray()));
command.Parameters.AddWithValue("@embedding", JsonSerializer.Serialize(record.Vector.Data));
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
command.Dispose();

Expand Down
5 changes: 3 additions & 2 deletions service/Service/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -633,8 +633,9 @@
"MemoryTableName": "KMMemories",
"EmbeddingsTableName": "KMEmbeddings",
"TagsTableName": "KMMemoriesTags",
// See https://devblogs.microsoft.com/azure-sql/announcing-eap-native-vector-support-in-azure-sql-database
"UseNativeVectorSearch": false
// See https://learn.microsoft.com/sql/relational-databases/vectors/vectors-sql-server?view=azuresqldb-current
"UseNativeVectorSearch": false,
"VectorSize": 1536
}
}
}
Expand Down

0 comments on commit 13429f9

Please sign in to comment.