Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Vector Search #1639

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
7284ac5
Add vector value type
cindy-peng Oct 22, 2024
fe5e966
Add vectorValue type
cindy-peng Oct 22, 2024
9c19afa
Add vector value test
cindy-peng Oct 22, 2024
8163c95
Add unit tests and system tests
cindy-peng Oct 26, 2024
6df2d8a
Fix formatting
cindy-peng Oct 26, 2024
32fdd41
Fix empty FindNearest pb instance
cindy-peng Oct 28, 2024
2b11399
Fix formatting
cindy-peng Oct 28, 2024
6165685
Fix javadoc
cindy-peng Oct 28, 2024
4461f5b
Merge from main
cindy-peng Oct 28, 2024
17b411f
fix(sample): change update entity sample to use transaction (#1633)
cindy-peng Oct 24, 2024
2aafa17
deps: update dependency com.google.cloud:sdk-platform-java-config to …
renovate-bot Oct 24, 2024
949a0ae
chore(main): release 2.24.0 (#1631)
release-please[bot] Oct 25, 2024
11b3227
deps: update googleapis/sdk-platform-java action to v2.49.0 (#1638)
renovate-bot Oct 28, 2024
742c7b9
chore(main): release 2.24.1-SNAPSHOT (#1635)
release-please[bot] Oct 28, 2024
089b68e
chore: Update generation configuration at Sun Oct 27 02:26:19 UTC 202…
cloud-java-bot Oct 28, 2024
81980d2
deps: update dependency com.google.cloud:sdk-platform-java-config to …
renovate-bot Oct 28, 2024
546cf81
chore(main): release 2.24.1 (#1641)
release-please[bot] Oct 28, 2024
4b21c3e
merging conflict
cindy-peng Oct 28, 2024
c495bb6
chore: generate libraries at Mon Oct 28 20:25:23 UTC 2024
cloud-java-bot Oct 28, 2024
e93ce5c
Fix import
cindy-peng Oct 28, 2024
0072de4
chore: generate libraries at Mon Oct 28 20:30:34 UTC 2024
cloud-java-bot Oct 28, 2024
a371467
Add Integration test
cindy-peng Oct 30, 2024
c8340bf
Add comment and fix formatting
cindy-peng Oct 30, 2024
62da35d
Modify comment and fix formatting
cindy-peng Oct 30, 2024
9694abb
Add setExcludeFromIndexes back to vectorvalue builder
cindy-peng Oct 30, 2024
f267339
Adjust testVectorSearch sample code
cindy-peng Oct 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import com.google.cloud.datastore.DatastoreOptions;
import com.google.cloud.datastore.Entity;
import com.google.cloud.datastore.EntityQuery;
import com.google.cloud.datastore.FindNearest;
import com.google.cloud.datastore.FullEntity;
import com.google.cloud.datastore.IncompleteKey;
import com.google.cloud.datastore.Key;
Expand All @@ -47,6 +48,7 @@
import com.google.cloud.datastore.StructuredQuery.OrderBy;
import com.google.cloud.datastore.StructuredQuery.PropertyFilter;
import com.google.cloud.datastore.Transaction;
import com.google.cloud.datastore.VectorValue;
import com.google.cloud.datastore.testing.LocalDatastoreHelper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -408,6 +410,9 @@ private void setUpQueryTests() {
"description",
StringValue.newBuilder("Learn Cloud Datastore").setExcludeFromIndexes(true).build())
.set("tag", "fun", "l", "programming", "learn")
.set(
"vector_property",
VectorValue.newBuilder(3.0, 1.0, 2.0).setExcludeFromIndexes(true).build())
.build());
}

Expand Down Expand Up @@ -1193,4 +1198,18 @@ public void testStaleReads() throws InterruptedException {
// [END datastore_stale_read]
assertValidQueryRealBackend(query);
}

@Test
public void testVectorSearch() {
setUpQueryTests();
// [START datastore_vector_search]
VectorValue vectorValue = VectorValue.newBuilder(1.78, 2.56, 3.88).build();
FindNearest vectorQuery =
new FindNearest(
"vector_property", vectorValue, FindNearest.DistanceMeasure.COSINE, 1, "distance");

Query<Entity> query = Query.newEntityQueryBuilder().setFindNearest(vectorQuery).build();
// [END datastore_vector_search]
assertValidQuery(query);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,17 @@ public <T extends Value<?>> List<T> getList(String name) {
return (List<T>) getValue(name).get();
}

/**
* Returns the property value as a vector.
*
* @throws DatastoreException if no such property
* @throws ClassCastException if value is not a vector
*/
@SuppressWarnings("unchecked")
public List<DoubleValue> getVector(String name) {
return (List<DoubleValue>) getValue(name).get();
}

/**
* Returns the property value as a blob.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.datastore;

import com.google.common.base.MoreObjects;
import com.google.common.base.MoreObjects.ToStringHelper;
import com.google.protobuf.DoubleValue;
import com.google.protobuf.Int32Value;
import java.io.Serializable;
import java.util.Objects;
import javax.annotation.Nullable;

/**
* A query that finds the entities whose vector fields are closest to a certain query vector. Create
* an instance of `FindNearest` with {@link Query}.
*/
public final class FindNearest implements Serializable {

/** An indexed vector property to search upon. */
private final String vectorProperty;
/** The query vector that we are searching on. */
private final VectorValue queryVector;
/** The Distance Measure to use, required. */
private final DistanceMeasure measure;
/** The number of nearest neighbors to return. Must be a positive integer of no more than 100. */
private final int limit;

/**
* Optional. Optional name of the field to output the result of the vector distance calculation.
*/
private final @Nullable String distanceResultField;

/**
* Optional. Option to specify a threshold for which no less similar documents will be returned.
* The behavior of the specified `distance_measure` will affect the meaning of the distance
* threshold.
*/
private final @Nullable Double distanceThreshold;

private static final long serialVersionUID = 4688656124180403551L;

/** Creates a FindNearest query. */
public FindNearest(
String vectorProperty,
VectorValue queryVector,
DistanceMeasure measure,
int limit,
@Nullable String distanceResultField,
@Nullable Double distanceThreshold) {
this.vectorProperty = vectorProperty;
this.queryVector = queryVector;
this.measure = measure;
this.limit = limit;
this.distanceResultField = distanceResultField;
this.distanceThreshold = distanceThreshold;
}

public FindNearest(
String vectorProperty, VectorValue queryVector, DistanceMeasure measure, int limit) {
this(vectorProperty, queryVector, measure, limit, null, null);
}

public FindNearest(
String vectorProperty,
VectorValue queryVector,
DistanceMeasure measure,
int limit,
@Nullable String distanceResultField) {
this(vectorProperty, queryVector, measure, limit, distanceResultField, null);
}

public FindNearest(
String vectorProperty,
VectorValue queryVector,
DistanceMeasure measure,
int limit,
@Nullable Double distanceThreshold) {
this(vectorProperty, queryVector, measure, limit, null, distanceThreshold);
}

@Override
public int hashCode() {
return Objects.hash(
vectorProperty, queryVector, measure, limit, distanceResultField, distanceThreshold);
}

/**
* Returns true if this FindNearest query is equal to the provided object.
*
* @param obj The object to compare against.
* @return Whether this FindNearest query is equal to the provided object.
*/
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || !(obj instanceof FindNearest)) {
return false;
}
FindNearest otherQuery = (FindNearest) obj;
return Objects.equals(vectorProperty, otherQuery.vectorProperty)
&& Objects.equals(queryVector, otherQuery.queryVector)
&& Objects.equals(distanceResultField, otherQuery.distanceResultField)
&& Objects.equals(distanceThreshold, otherQuery.distanceThreshold)
&& limit == otherQuery.limit
&& measure == otherQuery.measure;
}

@Override
public String toString() {
ToStringHelper toStringHelper = MoreObjects.toStringHelper(this);
toStringHelper.add("vectorProperty", vectorProperty);
toStringHelper.add("queryVector", queryVector);
toStringHelper.add("measure", measure);
toStringHelper.add("limit", limit);
toStringHelper.add("distanceResultField", distanceResultField);
toStringHelper.add("distanceThreshold", distanceThreshold);
return toStringHelper.toString();
}

static FindNearest fromPb(com.google.datastore.v1.FindNearest findNearestPb) {
String vectorProperty = findNearestPb.getVectorProperty().getName();
VectorValue queryVector =
VectorValue.MARSHALLER.fromProto(findNearestPb.getQueryVector()).build();
DistanceMeasure distanceMeasure =
DistanceMeasure.valueOf(findNearestPb.getDistanceMeasure().toString());
int limit = findNearestPb.getLimit().getValue();
String distanceResultField =
findNearestPb.getDistanceResultProperty() == null
|| findNearestPb.getDistanceResultProperty().isEmpty()
? null
: findNearestPb.getDistanceResultProperty();
Double distanceThreshold =
findNearestPb.getDistanceThreshold() == null
|| findNearestPb.getDistanceThreshold() == DoubleValue.getDefaultInstance()
? null
: findNearestPb.getDistanceThreshold().getValue();
return new FindNearest(
vectorProperty,
queryVector,
distanceMeasure,
limit,
distanceResultField,
distanceThreshold);
}

com.google.datastore.v1.FindNearest toPb() {
com.google.datastore.v1.FindNearest.Builder findNearestPb =
com.google.datastore.v1.FindNearest.newBuilder();
findNearestPb.getVectorPropertyBuilder().setName(vectorProperty);
findNearestPb.setQueryVector(queryVector.toPb());
findNearestPb.setDistanceMeasure(toProto(measure));
findNearestPb.setLimit(Int32Value.of(limit));
if (distanceResultField != null) {
findNearestPb.setDistanceResultProperty(distanceResultField);
}
if (distanceThreshold != null) {
findNearestPb.setDistanceThreshold(DoubleValue.of(distanceThreshold));
}
return findNearestPb.build();
}

protected static com.google.datastore.v1.FindNearest.DistanceMeasure toProto(
DistanceMeasure distanceMeasure) {
switch (distanceMeasure) {
case COSINE:
return com.google.datastore.v1.FindNearest.DistanceMeasure.COSINE;
case EUCLIDEAN:
return com.google.datastore.v1.FindNearest.DistanceMeasure.EUCLIDEAN;
case DOT_PRODUCT:
return com.google.datastore.v1.FindNearest.DistanceMeasure.DOT_PRODUCT;
default:
return com.google.datastore.v1.FindNearest.DistanceMeasure.UNRECOGNIZED;
}
}

/** The distance measure to use when comparing vectors in a {@link FindNearest query}. */
public enum DistanceMeasure {
DISTANCE_MEASURE_UNSPECIFIED,
/**
* COSINE distance compares vectors based on the angle between them, which allows you to measure
* similarity that isn't based on the vectors' magnitude. We recommend using DOT_PRODUCT with
* unit normalized vectors instead of COSINE distance, which is mathematically equivalent with
* better performance.
*/
COSINE,
/** Measures the EUCLIDEAN distance between the vectors. */
EUCLIDEAN,
/** Similar to cosine but is affected by the magnitude of the vectors. */
DOT_PRODUCT
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ public abstract class StructuredQuery<V> extends Query<V> implements RecordQuery
private final Cursor endCursor;
private final int offset;
private final Integer limit;
private final FindNearest findNearest;

private final ResultType<V> resultType;

Expand Down Expand Up @@ -731,6 +732,9 @@ public interface Builder<V> {
/** Adds settings to the existing order by clause. */
Builder<V> addOrderBy(OrderBy orderBy, OrderBy... others);

/** Sets the find_nearest for the query. */
Builder<V> setFindNearest(FindNearest findNearest);

StructuredQuery<V> build();
}

Expand All @@ -753,6 +757,7 @@ abstract static class BuilderImpl<V, B extends BuilderImpl<V, B>> implements Bui
private Cursor endCursor;
private int offset;
private Integer limit;
private FindNearest findNearest;

BuilderImpl(ResultType<V> resultType) {
this.resultType = resultType;
Expand All @@ -770,6 +775,7 @@ abstract static class BuilderImpl<V, B extends BuilderImpl<V, B>> implements Bui
endCursor = query.endCursor;
offset = query.offset;
limit = query.limit;
findNearest = query.findNearest;
}

@SuppressWarnings("unchecked")
Expand Down Expand Up @@ -841,6 +847,13 @@ public B addOrderBy(OrderBy orderBy, OrderBy... others) {
return self();
}

@Override
public B setFindNearest(FindNearest findNearest) {
Preconditions.checkArgument(findNearest != null, "vector query must not be null");
this.findNearest = findNearest;
return self();
}

B clearProjection() {
projection.clear();
return self();
Expand Down Expand Up @@ -904,6 +917,10 @@ B mergeFrom(com.google.datastore.v1.Query queryPb) {
for (com.google.datastore.v1.PropertyReference distinctOnPb : queryPb.getDistinctOnList()) {
addDistinctOn(distinctOnPb.getName());
}
if (queryPb.getFindNearest() != null
&& queryPb.getFindNearest() != com.google.datastore.v1.FindNearest.getDefaultInstance()) {
setFindNearest(FindNearest.fromPb(queryPb.getFindNearest()));
}
return self();
}
}
Expand All @@ -920,6 +937,7 @@ B mergeFrom(com.google.datastore.v1.Query queryPb) {
endCursor = builder.endCursor;
offset = builder.offset;
limit = builder.limit;
findNearest = builder.findNearest;
}

@Override
Expand All @@ -935,6 +953,7 @@ public String toString() {
.add("orderBy", orderBy)
.add("projection", projection)
.add("distinctOn", distinctOn)
.add("findNearest", findNearest)
.toString();
}

Expand All @@ -950,7 +969,8 @@ public int hashCode() {
filter,
orderBy,
projection,
distinctOn);
distinctOn,
findNearest);
}

@Override
Expand All @@ -971,7 +991,8 @@ public boolean equals(Object obj) {
&& Objects.equals(filter, other.filter)
&& Objects.equals(orderBy, other.orderBy)
&& Objects.equals(projection, other.projection)
&& Objects.equals(distinctOn, other.distinctOn);
&& Objects.equals(distinctOn, other.distinctOn)
&& Objects.equals(findNearest, other.findNearest);
}

/** Returns the kind for this query. */
Expand Down Expand Up @@ -1023,6 +1044,11 @@ public Integer getLimit() {
return limit;
}

/** Returns the vector query for this query. */
public FindNearest getFindNearest() {
return findNearest;
}

public abstract Builder<V> toBuilder();

@InternalApi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ public Query prepare(StructuredQuery<?> query) {
.build();
queryPb.addProjection(expressionPb);
}
if (query.getFindNearest() != null) {
queryPb.setFindNearest(query.getFindNearest().toPb());
}

return queryPb.build();
}
Expand Down
Loading
Loading