Skip to content

Commit 89a57e9

Browse files
Add schema limit (#132)
Signed-off-by: junjie.jiang <[email protected]>
1 parent 593c4f2 commit 89a57e9

File tree

5 files changed

+109
-9
lines changed

5 files changed

+109
-9
lines changed

src/common.h

+2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ const std::string kMetaFieldName("$meta");
5858
const std::string kPlaceholderTag("$0");
5959

6060
const int64_t kTopkLimit = 16384;
61+
const int64_t kSchemaFieldLimit = 64;
62+
const int64_t kMaxLengthLimit = 65535;
6163

6264
// scalar index type
6365
const std::string kDefaultStringIndexType("Trie");

src/create_collection_task.cpp

+24-7
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ CreateCollectionTask::GetVarcharFieldMaxLength(
5555
if (kv_pair.key() == kMaxLengthKey) {
5656
try {
5757
auto length = std::stoll(kv_pair.value());
58-
if (length <= 0) {
58+
if (length <= 0 || length > kMaxLengthLimit) {
5959
return Status::ParameterInvalid(
6060
"the maximum length specified for a VarChar should be "
6161
"in (0, 65535])");
@@ -73,8 +73,17 @@ CreateCollectionTask::GetVarcharFieldMaxLength(
7373
for (const auto& kv_pair : field.index_params()) {
7474
if (kv_pair.key() == kMaxLengthKey) {
7575
try {
76-
*max_len = std::stoll(kv_pair.value());
77-
return Status::Ok();
76+
auto length = std::stoll(kv_pair.value());
77+
if (length <= 0 || length > kMaxLengthLimit) {
78+
return Status::ParameterInvalid(
79+
"the maximum length specified for a VarChar should be "
80+
"in (0, 65535])");
81+
82+
return Status::Ok();
83+
} else {
84+
*max_len = static_cast<uint64_t>(length);
85+
return Status::Ok();
86+
}
7887
} catch (std::exception& e) {
7988
return Status::ParameterInvalid("Invalid max length {}",
8089
kv_pair.value());
@@ -142,7 +151,8 @@ CreateCollectionTask::CheckDefaultValue(
142151
case DCase::kFloatData:
143152
if (f.data_type() != DType::Float) {
144153
LOG_ERROR(
145-
"{} field's default value is Float type, mismatches "
154+
"{} field's default value is Float type, "
155+
"mismatches "
146156
"field type",
147157
f.name());
148158
return false;
@@ -151,7 +161,8 @@ CreateCollectionTask::CheckDefaultValue(
151161
case DCase::kDoubleData:
152162
if (f.data_type() != DType::Double) {
153163
LOG_ERROR(
154-
"{} field's default value is Double type, mismatches "
164+
"{} field's default value is Double type, "
165+
"mismatches "
155166
"field type",
156167
f.name());
157168
return false;
@@ -229,6 +240,11 @@ CreateCollectionTask::AppendSysFields(
229240
Status
230241
CreateCollectionTask::ValidateSchema(
231242
const ::milvus::proto::schema::CollectionSchema& schema) {
243+
if (schema.fields_size() > kSchemaFieldLimit)
244+
return Status::ParameterInvalid(
245+
"maximum field's number should be limited to {}",
246+
kSchemaFieldLimit);
247+
232248
std::set<std::string> field_names;
233249
std::string pk_name;
234250
for (const auto& field_schema : schema.fields()) {
@@ -239,13 +255,14 @@ CreateCollectionTask::ValidateSchema(
239255
if (field_schema.is_primary_key()) {
240256
if (!pk_name.empty()) {
241257
return Status::ParameterInvalid(
242-
"there are more than one primary key, field_name = {}, {}",
258+
"there are more than one primary key, field_name = {}, "
259+
"{}",
243260
pk_name,
244261
field_schema.name());
245262
} else {
246263
pk_name = field_schema.name();
247264
}
248-
}
265+
}
249266
if (field_schema.is_dynamic()) {
250267
return Status::ParameterInvalid(
251268
"cannot explicitly set a field as a dynamic field");

src/unittest/run_examples.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616

1717

1818
def run_all(py_path):
19-
20-
for f in examples_dir.glob('*.py'):
19+
for f in py_path.glob('*.py'):
2120
if str(f).endswith('bfloat16_example.py') or str(f).endswith('dynamic_field.py'):
2221
continue
2322
print(str(f))

tests/test_delete.py

+2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def test_delete_by_ids(self):
4040
result = milvus_client.delete(collection_name, ids=['-xf%^@#$%^&***)(*/.', '中文id'])
4141
result = milvus_client.search(collection_name, [[0.0, 1.0]], limit=3)
4242
self.assertEqual([item['id']for item in result[0]], ['Título', 'Cien años de soledad'])
43+
milvus_client.release_collection(collection_name)
4344
del milvus_client
4445

4546
local_client = MilvusClient('./local_test.db')
@@ -77,6 +78,7 @@ def test_delete_by_filter(self):
7778
result = milvus_client.delete(collection_name, filter='(a==100) && (b==300)')
7879
result = milvus_client.search(collection_name, [[0.0, 1.0]], limit=3)
7980
self.assertEqual([item['id']for item in result[0]], ['中文id', 'Título', 'Cien años de soledad'])
81+
milvus_client.release_collection(collection_name)
8082
del milvus_client
8183

8284
local_client = MilvusClient('./local_test.db')

tests/test_schema.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright (C) 2019-2024 Zilliz. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4+
# in compliance with the License. You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software distributed under the License
9+
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10+
# or implied. See the License for the specific language governing permissions and limitations under
11+
# the License.
12+
13+
import unittest
14+
from pymilvus import MilvusClient, MilvusException, DataType
15+
16+
17+
class TestDefaultSearch(unittest.TestCase):
18+
def test_schema_field_limits(self):
19+
collection_name = "hello_milvus"
20+
milvus_client = MilvusClient("./local_test.db")
21+
has_collection = milvus_client.has_collection(collection_name)
22+
if has_collection:
23+
milvus_client.drop_collection(collection_name)
24+
schema = milvus_client.create_schema(enable_dynamic_field=True)
25+
schema.add_field("id", DataType.INT64, is_primary=True)
26+
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=2)
27+
for i in range(62):
28+
schema.add_field('a' + str(i), DataType.INT64)
29+
index_params = milvus_client.prepare_index_params()
30+
index_params.add_index(field_name = "embeddings", metric_type="L2")
31+
milvus_client.create_collection(collection_name, schema=schema, index_params=index_params)
32+
33+
def test_schema_field_out_limits(self):
34+
collection_name = "hello_milvus"
35+
milvus_client = MilvusClient("./local_test.db")
36+
has_collection = milvus_client.has_collection(collection_name)
37+
if has_collection:
38+
milvus_client.drop_collection(collection_name)
39+
schema = milvus_client.create_schema(enable_dynamic_field=True)
40+
schema.add_field("id", DataType.INT64, is_primary=True)
41+
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=2)
42+
for i in range(63):
43+
schema.add_field('a' + str(i), DataType.INT64)
44+
index_params = milvus_client.prepare_index_params()
45+
index_params.add_index(field_name = "embeddings", metric_type="L2")
46+
with self.assertRaises(MilvusException):
47+
milvus_client.create_collection(collection_name, schema=schema, index_params=index_params)
48+
49+
def test_varchar_field_maxlen(self):
50+
collection_name = "hello_milvus"
51+
milvus_client = MilvusClient("./local_test.db")
52+
has_collection = milvus_client.has_collection(collection_name)
53+
if has_collection:
54+
milvus_client.drop_collection(collection_name)
55+
schema = milvus_client.create_schema(enable_dynamic_field=True)
56+
schema.add_field("id", DataType.INT64, is_primary=True)
57+
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=2)
58+
schema.add_field("string", DataType.VARCHAR, max_length=65535)
59+
index_params = milvus_client.prepare_index_params()
60+
index_params.add_index(field_name = "embeddings", metric_type="L2")
61+
milvus_client.create_collection(collection_name, schema=schema, index_params=index_params)
62+
63+
def test_varchar_field_out_maxlen(self):
64+
collection_name = "hello_milvus"
65+
milvus_client = MilvusClient("./local_test.db")
66+
has_collection = milvus_client.has_collection(collection_name)
67+
if has_collection:
68+
milvus_client.drop_collection(collection_name)
69+
schema = milvus_client.create_schema(enable_dynamic_field=True)
70+
schema.add_field("id", DataType.INT64, is_primary=True)
71+
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=2)
72+
schema.add_field("string", DataType.VARCHAR, max_length=65536)
73+
index_params = milvus_client.prepare_index_params()
74+
index_params.add_index(field_name = "embeddings", metric_type="L2")
75+
with self.assertRaises(MilvusException):
76+
milvus_client.create_collection(collection_name, schema=schema, index_params=index_params)
77+
78+
79+
if __name__ == '__main__':
80+
unittest.main()

0 commit comments

Comments
 (0)