Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose runtime and build parameters of all the VCL indices #263

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions src/DescriptorsCommand.cc
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,20 @@ Json::Value FindDescriptorSet::construct_responses(

AddDescriptorSet::AddDescriptorSet() : DescriptorsCommand("AddDescriptorSet") {
_storage_sets = VDMSConfig::instance()->get_path_descriptors();
_flinng_num_rows = 3; // set based on the default values of Flinng
_flinng_cells_per_row = 1000;
_flinng_num_hash_tables = 10;
_flinng_hashes_per_table = 12;
_flinng_sub_hash_bits = 2;
_flinng_cut_off = 6;

//Flinng params
_flinng_num_rows = static_cast<uint64_t>(VDMSConfig::instance()->get_flinng_num_rows().value_or(3));
_flinng_cells_per_row = static_cast<uint64_t>(VDMSConfig::instance()->get_flinng_cells_per_row().value_or(1000));
_flinng_num_hash_tables = static_cast<uint64_t>(VDMSConfig::instance()->get_flinng_num_hash_tables().value_or(10));
_flinng_hashes_per_table = static_cast<uint64_t>(VDMSConfig::instance()->get_flinng_hashes_per_table().value_or(12));

//IVF params
_ivf_nlist = static_cast<uint64_t>(VDMSConfig::instance()->get_ivf_nlist().value_or(16));

//HNSW params
_hnsw_efsearch = static_cast<uint64_t>(VDMSConfig::instance()->get_hnsw_efsearch().value_or(64));
_hnsw_efConstruction = static_cast<uint64_t>(VDMSConfig::instance()->get_hnsw_efConstruction().value_or(96));
_hnsw_M = static_cast<uint64_t>(VDMSConfig::instance()->get_hnsw_M().value_or(48));

//_use_aws_storage = VDMSConfig::instance()->get_aws_flag();
}
Expand All @@ -228,6 +236,7 @@ int AddDescriptorSet::construct_protobuf(PMGDQuery &query,
props[VDMS_DESC_SET_DIM_PROP] = cmd["dimensions"].asInt();
props[VDMS_DESC_SET_PATH_PROP] = desc_set_path;
props[VDMS_DESC_SET_ENGIN_PROP] = cmd["engine"].asString();

if (props[VDMS_DESC_SET_ENGIN_PROP] == "Flinng") {
if (cmd.isMember("flinng_num_rows"))
_flinng_num_rows = cmd["flinng_num_rows"].asInt();
Expand All @@ -242,7 +251,21 @@ int AddDescriptorSet::construct_protobuf(PMGDQuery &query,
if (cmd.isMember("flinng_cut_off"))
_flinng_cut_off = cmd["flinng_cut_off"].asInt();
}


if (props[VDMS_DESC_SET_ENGIN_PROP] == "FaissIVFFlat"){
if (cmd.isMember("ivf_nlist"))
_ivf_nlist = cmd["ivf_nlist"].asInt();
}

if (props[VDMS_DESC_SET_ENGIN_PROP] == "FaissHNSWFlat"){
if (cmd.isMember("hnsw_efsearch"))
_hnsw_efsearch = cmd["hnsw_efsearch"].asInt();
if (cmd.isMember("hnsw_efConstruction"))
_hnsw_efConstruction = cmd["_hnsw_efConstruction"].asInt();
if (cmd.isMember("hnsw_M"))
_hnsw_M = cmd["hnsw_M"].asInt();
}

Json::Value constraints;
constraints[VDMS_DESC_SET_NAME_PROP].append("==");
constraints[VDMS_DESC_SET_NAME_PROP].append(cmd["name"].asString());
Expand Down Expand Up @@ -312,10 +335,15 @@ Json::Value AddDescriptorSet::construct_responses(
// We can probably set up a mechanism
// to fix a broken link when detected later, same with images.
VCL::DescriptorParams *param = nullptr;

try {
param = new VCL::DescriptorParams(_flinng_num_rows, _flinng_cells_per_row,
_flinng_num_hash_tables,
_flinng_hashes_per_table);
_flinng_hashes_per_table,
_ivf_nlist,
_hnsw_efsearch,
_hnsw_efConstruction,
_hnsw_M);
VCL::DescriptorSet desc_set(desc_set_path, dimensions, _eng, metric, param);

if (_use_aws_storage) {
Expand Down
6 changes: 5 additions & 1 deletion src/DescriptorsCommand.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,12 @@ class AddDescriptorSet : public DescriptorsCommand {
_flinng_sub_hash_bits; // sub_hash_bits * hashes_per_table must be
// less than 32, otherwise segfault will happen
uint64_t _flinng_cut_off;
// bool _use_aws_storage;
uint64_t _ivf_nlist; //Nlist for IVF Index
uint64_t _hnsw_efsearch; //Efsearch for the search width of hnsw
uint64_t _hnsw_efConstruction; //Efconstruction for the width of hnsw build
uint64_t _hnsw_M; //typically Efconstruction=2*M

// bool _use_aws_storage;
public:
AddDescriptorSet();

Expand Down
78 changes: 78 additions & 0 deletions src/VDMSConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,16 @@ VDMSConfig::VDMSConfig(std::string config_file) {
proxy_port = std::nullopt;
proxy_scheme = std::nullopt;

flinng_num_rows = std::nullopt;
flinng_cells_per_row = std::nullopt;
flinng_num_hash_tables = std::nullopt;
flinng_hashes_per_table = std::nullopt;
ivf_nlist = std::nullopt;
hnsw_efsearch = std::nullopt;
hnsw_efConstruction = std::nullopt;
hnsw_M = std::nullopt;


bool parsingSuccessful = reader.parse(file, json_config);

if (!parsingSuccessful) {
Expand Down Expand Up @@ -364,6 +374,74 @@ void VDMSConfig::build_dirs() {
if (aws_log_level_map.find(aws_log_level_value) != aws_log_level_map.end()) {
aws_log_level = aws_log_level_map.at(aws_log_level_value);
}

//Descriptor parameters

// flinng_num_rows
if (exists_key(PARAM_FLINNG_NUM_ROWS)) {
value = get_string_value(PARAM_FLINNG_NUM_ROWS, KEY_NOT_FOUND);
flinng_num_rows = std::optional<int>{stoi(value)};
} else {
flinng_num_rows = std::optional<int>{3};
}

// flinng_cells_per_row
if (exists_key(PARAM_FLINNG_CELLS_PER_ROW)) {
value = get_string_value(PARAM_FLINNG_CELLS_PER_ROW, KEY_NOT_FOUND);
flinng_cells_per_row = std::optional<int>{stoi(value)};
} else {
flinng_cells_per_row = std::optional<int>{1000};
}

// flinng_num_hash_tables
if (exists_key(PARAM_FLINNG_NUM_HASH_TABLES)) {
value = get_string_value(PARAM_FLINNG_NUM_HASH_TABLES, KEY_NOT_FOUND);
flinng_num_hash_tables = std::optional<int>{stoi(value)};
} else {
flinng_num_hash_tables = std::optional<int>{10};
}

// flinng_hashes_per_table
if (exists_key(PARAM_FLINNG_HASHES_PER_TABLE)) {
value = get_string_value(PARAM_FLINNG_HASHES_PER_TABLE, KEY_NOT_FOUND);
flinng_hashes_per_table = std::optional<int>{stoi(value)};
} else {
flinng_hashes_per_table = std::optional<int>{12};
}

// ivf_nlist
if (exists_key(PARAM_IVF_NLIST)) {
value = get_string_value(PARAM_IVF_NLIST, KEY_NOT_FOUND);
ivf_nlist = std::optional<int>{stoi(value)};
} else {
ivf_nlist = std::optional<int>{16};
}

// hnsw_efsearch
if (exists_key(PARAM_HNSW_EFSEARCH)) {
value = get_string_value(PARAM_HNSW_EFSEARCH, KEY_NOT_FOUND);
hnsw_efsearch = std::optional<int>{stoi(value)};
} else {
hnsw_efsearch = std::optional<int>{64};
}

// hnsw_efConstruction
if (exists_key(PARAM_HNSW_EFCONSTRUCTION)) {
value = get_string_value(PARAM_HNSW_EFCONSTRUCTION, KEY_NOT_FOUND);
hnsw_efConstruction = std::optional<int>{stoi(value)};
} else {
hnsw_efConstruction = std::optional<int>{96};
}

// hnsw_M
if (exists_key(PARAM_HNSW_M)) {
value = get_string_value(PARAM_HNSW_M, KEY_NOT_FOUND);
hnsw_M = std::optional<int>{stoi(value)};
} else {
hnsw_M = std::optional<int>{48};
}


}

bool VDMSConfig::exists_key(const std::string &key) {
Expand Down
41 changes: 41 additions & 0 deletions src/VDMSConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ const std::string PARAM_PROXY_SCHEME = "proxy_scheme";
const std::string PARAM_USE_ENDPOINT = "use_endpoint";
const std::string PARAM_AWS_LOG_LEVEL = "aws_log_level";

const std::string PARAM_FLINNG_NUM_ROWS = "flinng_num_rows";
const std::string PARAM_FLINNG_CELLS_PER_ROW = "flinng_cells_per_row";
const std::string PARAM_FLINNG_NUM_HASH_TABLES = "flinng_num_hash_tables";
const std::string PARAM_FLINNG_HASHES_PER_TABLE = "flinng_hashes_per_table";
const std::string PARAM_IVF_NLIST = "ivf_nlist";
const std::string PARAM_HNSW_EFSEARCH = "hnsw_efsearch";
const std::string PARAM_HNSW_EFCONSTRUCTION = "hnsw_efConstruction";
const std::string PARAM_HNSW_M = "hnsw_M";

namespace VDMS {

class VDMSConfig {
Expand Down Expand Up @@ -139,6 +148,18 @@ class VDMSConfig {
return aws_log_level;
}

//Descriptor Optional Parameters
const std::optional<int> &get_flinng_num_rows() { return flinng_num_rows; }
const std::optional<int> &get_flinng_cells_per_row() { return flinng_cells_per_row; }
const std::optional<int> &get_flinng_num_hash_tables() { return flinng_num_hash_tables; }
const std::optional<int> &get_flinng_hashes_per_table() { return flinng_hashes_per_table; }
const std::optional<int> &get_ivf_nlist() { return ivf_nlist; }
const std::optional<int> &get_hnsw_efsearch() { return hnsw_efsearch; }
const std::optional<int> &get_hnsw_efConstruction() { return hnsw_efConstruction; }
const std::optional<int> &get_hnsw_M() { return hnsw_M; }



protected:
static VDMSConfig *cfg;
static std::mutex _mutex;
Expand Down Expand Up @@ -172,6 +193,16 @@ class VDMSConfig {
std::optional<std::string> proxy_scheme;
Aws::Utils::Logging::LogLevel aws_log_level;

std::optional<int> flinng_num_rows;
std::optional<int> flinng_cells_per_row;
std::optional<int> flinng_num_hash_tables;
std::optional<int> flinng_hashes_per_table;
std::optional<int> ivf_nlist;
std::optional<int> hnsw_efsearch;
std::optional<int> hnsw_efConstruction;
std::optional<int> hnsw_M;


void expand_directory_layer(
std::vector<std::vector<std::string> *> *p_directory_list,
int current_layer);
Expand All @@ -193,6 +224,16 @@ class VDMSConfig {
proxy_host = std::nullopt;
proxy_port = std::nullopt;
proxy_scheme = std::nullopt;

flinng_num_rows = std::optional<int>{3};
flinng_cells_per_row = std::optional<int>{1000};
flinng_num_hash_tables = std::optional<int>{10};
flinng_hashes_per_table = std::optional<int>{12};
ivf_nlist = std::optional<int>{16};
hnsw_efsearch = std::optional<int>{64};
hnsw_efConstruction = std::optional<int>{96};
hnsw_M = std::optional<int>{48};

}
};

Expand Down
12 changes: 11 additions & 1 deletion src/vcl/DescriptorParams.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ DescriptorParams::DescriptorParams(uint64_t numrows = 3,
uint64_t numhashtables = (1 << 9),
uint64_t hashespertable = 14,
uint64_t subhashbits = 2,
uint64_t cutoff = 6) {
uint64_t cutoff = 6,
uint64_t ivf_nlist = 16,
uint64_t hnsw_efsearch = 64,
uint64_t hnsw_efConstruction = 96,
uint64_t hnsw_M = 48) {
this->num_rows = numrows;
this->cells_per_row = cellsperrow;
this->num_hash_tables = numhashtables;
Expand All @@ -45,4 +49,10 @@ DescriptorParams::DescriptorParams(uint64_t numrows = 3,
subhashbits; // sub_hash_bits * hashes_per_table must be less than 32,
// otherwise segfault will happen
this->cut_off = cutoff;
//IVF Parameters
this->ivf_nlist = ivf_nlist;
//HNSW Parameters
this->hnsw_efsearch = hnsw_efsearch;
this->hnsw_efConstruction = hnsw_efConstruction;
this->hnsw_M = hnsw_M;
}
19 changes: 18 additions & 1 deletion src/vcl/DescriptorParams.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,33 @@ class DescriptorParams {
// 32, otherwise segfault will happen
uint64_t cut_off;

/*Params needed for IVF*/
uint64_t ivf_nlist;

/*Params needed for HNSW*/
uint64_t hnsw_efsearch;
uint64_t hnsw_efConstruction;
uint64_t hnsw_M;


DescriptorParams(uint64_t numrows = 3, uint64_t cellsperrow = (1 << 12),
uint64_t numhashtables = (1 << 9),
uint64_t hashespertable = 14, uint64_t subhashbits = 2,
uint64_t cutoff = 6) {
uint64_t cutoff = 6,
uint64_t ivf_nlist = 16,
uint64_t hnsw_efsearch = 64,
uint64_t hnsw_efConstruction = 96,
uint64_t hnsw_M = 48) {
this->num_rows = numrows;
this->cells_per_row = cellsperrow;
this->num_hash_tables = numhashtables;
this->hashes_per_table = hashespertable;
this->sub_hash_bits = subhashbits;
this->cut_off = cutoff;
this->ivf_nlist = ivf_nlist;
this->hnsw_efsearch = hnsw_efsearch;
this->hnsw_efConstruction = hnsw_efConstruction;
this->hnsw_M = hnsw_M;
}
};
}; // namespace VCL
4 changes: 2 additions & 2 deletions src/vcl/DescriptorSet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@ DescriptorSet::DescriptorSet(const std::string &set_path, unsigned dim,
if (eng == DescriptorSetEngine(FaissFlat))
_set = new FaissFlatDescriptorSet(set_path, dim, metric);
else if (eng == DescriptorSetEngine(FaissIVFFlat))
_set = new FaissIVFFlatDescriptorSet(set_path, dim, metric);
_set = new FaissIVFFlatDescriptorSet(set_path, dim, metric, param);
else if (eng == DescriptorSetEngine(TileDBDense))
_set = new TDBDenseDescriptorSet(set_path, dim, metric);
else if (eng == DescriptorSetEngine(TileDBSparse))
_set = new TDBSparseDescriptorSet(set_path, dim, metric);
else if (eng == DescriptorSetEngine(Flinng))
_set = new FlinngDescriptorSet(set_path, dim, metric, param);
else if (eng == DescriptorSetEngine(FaissHNSWFlat))
_set = new FaissHNSWFlatDescriptorSet(set_path, dim, metric);
_set = new FaissHNSWFlatDescriptorSet(set_path, dim, metric, param);
else {
std::cerr << "Index Not supported" << std::endl;
throw VCLException(UnsupportedIndex, "Index not supported");
Expand Down
23 changes: 15 additions & 8 deletions src/vcl/FaissDescriptorSet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,16 +284,18 @@ FaissIVFFlatDescriptorSet::FaissIVFFlatDescriptorSet(
}

FaissIVFFlatDescriptorSet::FaissIVFFlatDescriptorSet(
const std::string &set_path, unsigned dim, DistanceMetric metric)
const std::string &set_path, unsigned dim, DistanceMetric metric, VCL::DescriptorParams *par)
: FaissDescriptorSet(set_path, dim) {
// TODO: Revise nlist param for future optimizations.
// 4 is a suggested value by faiss for the IVFFlat index,
// that's why we leave it for now.
// int nlist = 4;

// default value of 4 is too low for any sizeable dataset

// int nlist = 16

int nlist = 16;
int nlist = par->ivf_nlist; //nlist is a configurable parameter

if (metric == L2) {
faiss::IndexFlatL2 *quantizer = new faiss::IndexFlatL2(_dimensions);
Expand Down Expand Up @@ -384,13 +386,17 @@ FaissHNSWFlatDescriptorSet::FaissHNSWFlatDescriptorSet(
}

FaissHNSWFlatDescriptorSet::FaissHNSWFlatDescriptorSet(
const std::string &set_path, unsigned dim, DistanceMetric metric)
const std::string &set_path, unsigned dim, DistanceMetric metric,VCL::DescriptorParams *par)
: FaissDescriptorSet(set_path, dim) {

int hnsw_M = 48;
int hnsw_M = par->hnsw_M ;
int hnsw_efConstruction= par->hnsw_efConstruction;
int hnsw_efsearch = par->hnsw_efsearch;

if (metric == L2) {
_index = new faiss::IndexHNSWFlat(dim, hnsw_M, faiss::METRIC_L2);
((faiss::IndexHNSWFlat *)_index)->hnsw.efConstruction = 96;
((faiss::IndexHNSWFlat *)_index)->hnsw.efConstruction = hnsw_efConstruction;
((faiss::IndexHNSWFlat *)_index)->hnsw.efSearch = hnsw_efsearch;
} else {
// only metric L2 is supported for HNSWFLAT for FAISS v1.7.4
// newer version of Faiss e.g. V1.8.0 supports I.P. metric for HNSW
Expand All @@ -401,14 +407,15 @@ FaissHNSWFlatDescriptorSet::FaissHNSWFlatDescriptorSet(
void FaissHNSWFlatDescriptorSet::search(float *query, unsigned n_queries,
unsigned k, long *descriptors,
float *distances) {
((faiss::IndexHNSWFlat *)_index)->hnsw.efSearch = 64;
//((faiss::IndexHNSWFlat *)_index)->hnsw.efSearch = 64;

//efSearch is modified to be set as a parameter when the index is constucted

// set according to
// https://github.com/facebookresearch/faiss/wiki/Indexing-1M-vectors for R@1
// accuracy of 0.9779
// The higher the value the slower the search is but better accuracy
// efsearch is a runtime parameter.
// ToDO - VDMS should expose an API to set runtime parameters to users of the
// different indices

_index->search(n_queries, query, k, distances, descriptors);
}
Loading
Loading