Skip to content
This repository has been archived by the owner on Aug 16, 2023. It is now read-only.

Commit

Permalink
Add some logs for diskann (#484)
Browse files Browse the repository at this point in the history
Signed-off-by: cqy123456 <[email protected]>

Signed-off-by: cqy123456 <[email protected]>
  • Loading branch information
cqy123456 authored Sep 27, 2022
1 parent 75177e4 commit 06d139b
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
9 changes: 6 additions & 3 deletions knowhere/index/vector_index/IndexDiskANNConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "knowhere/index/vector_index/IndexDiskANNConfig.h"

#include <algorithm>
#include <limits>
#include <optional>
#include <sstream>
Expand Down Expand Up @@ -59,6 +60,7 @@ static constexpr uint32_t kBuildNumThreadsMinValue = 1;
static constexpr uint32_t kBuildNumThreadsMaxValue = 128;
static constexpr uint32_t kDiskPqBytesMinValue = 0;
static constexpr std::optional<uint32_t> kDiskPqBytesMaxValue = std::nullopt;
static constexpr uint32_t kSearchListSizeMaxValue = 200;
static constexpr uint32_t kBeamwidthMinValue = 1;
static constexpr uint32_t kBeamwidthMaxValue = 128;
static constexpr uint64_t kKMinValue = 1;
Expand Down Expand Up @@ -200,8 +202,8 @@ from_json(const Config& config, DiskANNPrepareConfig& prep_conf) {
auto num_thread_max_value = kLinuxAioMaxnrLimit / prep_conf.aio_maxnr;
CheckNumericParamAndSet<uint32_t>(config, kNumThreads, kSearchNumThreadsMinValue, num_thread_max_value,
prep_conf.num_threads);
CheckNumericParamAndSet<float>(config, kCacheDramBudgetGb, kCacheDramBudgetGbMinValue,
kCacheDramBudgetGbMaxValue, prep_conf.search_cache_budget_gb);
CheckNumericParamAndSet<float>(config, kCacheDramBudgetGb, kCacheDramBudgetGbMinValue, kCacheDramBudgetGbMaxValue,
prep_conf.search_cache_budget_gb);
CheckNonNumbericParamAndSet<bool>(config, kWarmUp, prep_conf.warm_up);
CheckNonNumbericParamAndSet<bool>(config, kUseBfsCache, prep_conf.use_bfs_cache);
}
Expand All @@ -216,7 +218,8 @@ void
from_json(const Config& config, DiskANNQueryConfig& query_conf) {
CheckNumericParamAndSet<uint64_t>(config, kK, kKMinValue, kKMaxValue, query_conf.k);
// The search_list_size should be no less than the k.
CheckNumericParamAndSet<uint32_t>(config, kSearchListSize, query_conf.k, 10 * query_conf.k,
CheckNumericParamAndSet<uint32_t>(config, kSearchListSize, query_conf.k,
std::max(kSearchListSizeMaxValue, static_cast<uint32_t>(10 * query_conf.k)),
query_conf.search_list_size);
CheckNumericParamAndSet<uint32_t>(config, kBeamwidth, kBeamwidthMinValue, kBeamwidthMaxValue, query_conf.beamwidth);
}
Expand Down
13 changes: 9 additions & 4 deletions thirdparty/DiskANN/src/aux_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,7 @@ namespace diskann {
num_pq_chunks = num_pq_chunks <= 0 ? 1 : num_pq_chunks;
num_pq_chunks = num_pq_chunks > dim ? dim : num_pq_chunks;

LOG(DEBUG) << "Compressing " << dim << "-dimensional data into "
LOG(INFO) << "Compressing " << dim << "-dimensional data into "
<< num_pq_chunks << " bytes per vector.";

size_t train_size, train_dim;
Expand Down Expand Up @@ -1021,14 +1021,17 @@ namespace diskann {
if (config.compare_metric == diskann::Metric::INNER_PRODUCT)
make_zero_mean = false;

auto pq_s = std::chrono::high_resolution_clock::now();
generate_pq_pivots(train_data, train_size, (uint32_t) dim, 256,
(uint32_t) num_pq_chunks, NUM_KMEANS_REPS,
pq_pivots_path, make_zero_mean);

generate_pq_data_from_pivots<T>(data_file_to_use.c_str(), 256,
(uint32_t) num_pq_chunks, pq_pivots_path,
pq_compressed_vectors_path);

auto pq_e = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> pq_diff = pq_e - pq_s;
LOG(INFO) << "Training PQ codes cost: " << pq_diff.count() << "s";
delete[] train_data;

train_data = nullptr;
Expand All @@ -1037,12 +1040,14 @@ namespace diskann {
#if defined(RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
MallocExtension::instance()->ReleaseFreeMemory();
#endif

auto graph_s = std::chrono::high_resolution_clock::now();
diskann::build_merged_vamana_index<T>(
data_file_to_use.c_str(), diskann::Metric::L2, L, R,
config.accelerate_build, p_val, indexing_ram_budget, mem_index_path,
medoids_path, centroids_path);

auto graph_e = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> graph_diff = graph_e - graph_s;
LOG(INFO) << "Training graph cost: " << graph_diff.count() << "s";
if (!use_disk_pq) {
diskann::create_disk_layout<T>(data_file_to_use.c_str(), mem_index_path,
disk_index_path);
Expand Down

0 comments on commit 06d139b

Please sign in to comment.