diff --git a/thirdparty/faiss/.circleci/config.yml b/thirdparty/faiss/.circleci/config.yml index e105d7914..9ddcb5ba8 100644 --- a/thirdparty/faiss/.circleci/config.yml +++ b/thirdparty/faiss/.circleci/config.yml @@ -9,7 +9,7 @@ executors: environment: CONDA_ARCH: Linux-x86_64 machine: - image: linux-cuda-11:2023.02.1 + image: linux-cuda-11:default resource_class: gpu.nvidia.medium linux-arm64-cpu: environment: @@ -91,7 +91,7 @@ jobs: - run: name: Install conda build tools command: | - conda config --set solver libmamba + # conda config --set solver libmamba # conda config --set verbosity 3 conda update -y -q conda conda install -y -q conda-build @@ -171,7 +171,7 @@ jobs: sudo update-alternatives --set cuda /usr/local/cuda-<> cd conda conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - -c pytorch -c nvidia -c rapidsai -c conda-forge + -c pytorch -c nvidia -c rapidsai-nightly -c conda-forge - when: condition: and: @@ -186,7 +186,7 @@ jobs: sudo update-alternatives --set cuda /usr/local/cuda-<> cd conda conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - --user pytorch --label <> -c pytorch -c nvidia -c rapidsai -c conda-forge + --user pytorch --label <> -c pytorch -c nvidia -c rapidsai-nightly -c conda-forge build_cmake: parameters: @@ -236,7 +236,7 @@ jobs: - run: name: Install libraft command: | - conda install -y -q libraft cudatoolkit=11.4 -c rapidsai-nightly -c nvidia -c pkgs/main -c conda-forge + conda install -y -q libraft cuda-version=11.4 -c rapidsai-nightly -c nvidia -c pkgs/main -c conda-forge - run: name: Build all targets no_output_timeout: 30m @@ -283,7 +283,7 @@ jobs: - run: name: Python tests (CPU + GPU) command: | - conda install -y -q pytorch pytorch-cuda -c pytorch -c nvidia + conda install -y -q pytorch pytorch-cuda=11 -c pytorch -c nvidia pytest --junitxml=test-results/pytest/results.xml tests/test_*.py pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py cp tests/common_faiss_tests.py faiss/gpu/test @@ -350,7 +350,7 @@ workflows: exec: linux-x86_64-gpu label: main cuda: "11.4" - cuda_archs: "60;61;70;72;75;80;86" + cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" compiler_version: "11.2" filters: tags: @@ -363,7 +363,7 @@ workflows: label: main raft: "ON" cuda: "11.4" - cuda_archs: "60;61;70;72;75;80;86" + cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" compiler_version: "11.2" filters: tags: @@ -415,7 +415,7 @@ workflows: name: Linux x86_64 GPU nightlies (CUDA 11.4) exec: linux-x86_64-gpu cuda: "11.4" - cuda_archs: "60;61;70;72;75;80;86" + cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" compiler_version: "11.2" label: nightly - build_conda: @@ -423,7 +423,7 @@ workflows: exec: linux-x86_64-gpu raft: "ON" cuda: "11.4" - cuda_archs: "60;61;70;72;75;80;86" + cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" compiler_version: "11.2" label: nightly - build_conda: diff --git a/thirdparty/faiss/benchs/bench_all_ivf/bench_all_ivf.py b/thirdparty/faiss/benchs/bench_all_ivf/bench_all_ivf.py index e098e9527..cb4e097a0 100644 --- a/thirdparty/faiss/benchs/bench_all_ivf/bench_all_ivf.py +++ b/thirdparty/faiss/benchs/bench_all_ivf/bench_all_ivf.py @@ -7,6 +7,7 @@ import os import sys import time +import json import faiss import numpy as np @@ -19,105 +20,6 @@ sanitize = datasets.sanitize -###################################################### -# Command-line parsing -###################################################### - - -parser = argparse.ArgumentParser() - - -def aa(*args, **kwargs): - group.add_argument(*args, **kwargs) - - -group = parser.add_argument_group('dataset options') - -aa('--db', default='deep1M', help='dataset') -aa('--compute_gt', default=False, action='store_true', - help='compute and store the groundtruth') -aa('--force_IP', default=False, action="store_true", - help='force IP search instead of L2') - -group = parser.add_argument_group('index consturction') - -aa('--indexkey', default='HNSW32', help='index_factory type') -aa('--maxtrain', default=256 * 256, type=int, - help='maximum number of training points (0 to set automatically)') -aa('--indexfile', default='', help='file to read or write index from') -aa('--add_bs', default=-1, type=int, - help='add elements index by batches of this size') - - -group = parser.add_argument_group('IVF options') -aa('--by_residual', default=-1, type=int, - help="set if index should use residuals (default=unchanged)") -aa('--no_precomputed_tables', action='store_true', default=False, - help='disable precomputed tables (uses less memory)') -aa('--get_centroids_from', default='', - help='get the centroids from this index (to speed up training)') -aa('--clustering_niter', default=-1, type=int, - help='number of clustering iterations (-1 = leave default)') -aa('--train_on_gpu', default=False, action='store_true', - help='do training on GPU') - - -group = parser.add_argument_group('index-specific options') -aa('--M0', default=-1, type=int, help='size of base level for HNSW') -aa('--RQ_train_default', default=False, action="store_true", - help='disable progressive dim training for RQ') -aa('--RQ_beam_size', default=-1, type=int, - help='set beam size at add time') -aa('--LSQ_encode_ils_iters', default=-1, type=int, - help='ILS iterations for LSQ') -aa('--RQ_use_beam_LUT', default=-1, type=int, - help='use beam LUT at add time') - -group = parser.add_argument_group('searching') - -aa('--k', default=100, type=int, help='nb of nearest neighbors') -aa('--inter', default=False, action='store_true', - help='use intersection measure instead of 1-recall as metric') -aa('--searchthreads', default=-1, type=int, - help='nb of threads to use at search time') -aa('--searchparams', nargs='+', default=['autotune'], - help="search parameters to use (can be autotune or a list of params)") -aa('--n_autotune', default=500, type=int, - help="max nb of autotune experiments") -aa('--autotune_max', default=[], nargs='*', - help='set max value for autotune variables format "var:val" (exclusive)') -aa('--autotune_range', default=[], nargs='*', - help='set complete autotune range, format "var:val1,val2,..."') -aa('--min_test_duration', default=3.0, type=float, - help='run test at least for so long to avoid jitter') - -args = parser.parse_args() - -print("args:", args) - -os.system('echo -n "nb processors "; ' - 'cat /proc/cpuinfo | grep ^processor | wc -l; ' - 'cat /proc/cpuinfo | grep ^"model name" | tail -1') - -###################################################### -# Load dataset -###################################################### - -ds = datasets.load_dataset( - dataset=args.db, compute_gt=args.compute_gt) - -if args.force_IP: - ds.metric = "IP" - -print(ds) - -nq, d = ds.nq, ds.d -nb, d = ds.nq, ds.d - - -###################################################### -# Make index -###################################################### def unwind_index_ivf(index): if isinstance(index, faiss.IndexPreTransform): @@ -125,6 +27,10 @@ def unwind_index_ivf(index): vt = index.chain.at(0) index_ivf, vt2 = unwind_index_ivf(faiss.downcast_index(index.index)) assert vt2 is None + if vt is None: + vt = lambda x: x + else: + vt = faiss.downcast_VectorTransform(vt) return index_ivf, vt if hasattr(faiss, "IndexRefine") and isinstance(index, faiss.IndexRefine): return unwind_index_ivf(faiss.downcast_index(index.base_index)) @@ -157,16 +63,50 @@ def apply_AQ_options(index, args): index.rq.use_beam_LUT = args.RQ_use_beam_LUT -if args.indexfile and os.path.exists(args.indexfile): - print("reading", args.indexfile) - index = faiss.read_index(args.indexfile) +def eval_setting(index, xq, gt, k, inter, min_time): + """ evaluate searching in terms of precision vs. speed """ + nq = xq.shape[0] + ivf_stats = faiss.cvar.indexIVF_stats + ivf_stats.reset() + nrun = 0 + t0 = time.time() + while True: + D, I = index.search(xq, k) + nrun += 1 + t1 = time.time() + if t1 - t0 > min_time: + break + ms_per_query = ((t1 - t0) * 1000.0 / nq / nrun) + res = { + "ms_per_query": ms_per_query, + "nrun": nrun + } + res["n"] = ms_per_query + if inter: + rank = k + inter_measure = faiss.eval_intersection(gt[:, :rank], I[:, :rank]) / (nq * rank) + print("%.4f" % inter_measure, end=' ') + res["inter_measure"] = inter_measure + else: + res["recalls"] = {} + for rank in 1, 10, 100: + recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq) + print("%.4f" % recall, end=' ') + res["recalls"][rank] = recall + print(" %9.5f " % ms_per_query, end=' ') + print("%12d " % (ivf_stats.ndis / nrun), end=' ') + print(nrun) + res["ndis"] = ivf_stats.ndis / nrun + return res - index_ivf, vec_transform = unwind_index_ivf(index) - if vec_transform is None: - vec_transform = lambda x: x +###################################################### +# Training +###################################################### -else: +def run_train(args, ds, res): + nq, d = ds.nq, ds.d + nb, d = ds.nq, ds.d print("build index, key=", args.indexkey) @@ -176,10 +116,6 @@ def apply_AQ_options(index, args): ) index_ivf, vec_transform = unwind_index_ivf(index) - if vec_transform is None: - vec_transform = lambda x: x - else: - vec_transform = faiss.downcast_VectorTransform(vec_transform) if args.by_residual != -1: by_residual = args.by_residual == 1 @@ -205,9 +141,14 @@ def apply_AQ_options(index, args): 64) print(base_index.nprobe) elif isinstance(quantizer, faiss.IndexHNSW): - print(" update quantizer efSearch=", quantizer.hnsw.efSearch, end=" -> ") - quantizer.hnsw.efSearch = 40 if index_ivf.nlist < 4e6 else 64 - print(quantizer.hnsw.efSearch) + hnsw = quantizer.hnsw + print( + f" update HNSW quantizer options, before: " + f"{hnsw.efSearch=:} {hnsw.efConstruction=:}" + ) + hnsw.efSearch = 40 if index_ivf.nlist < 4e6 else 64 + hnsw.efConstruction = 200 + print(f" after: {hnsw.efSearch=:} {hnsw.efConstruction=:}") apply_AQ_options(index_ivf or index, args) @@ -286,182 +227,341 @@ def apply_AQ_options(index, args): t0 = time.time() index.train(xt2) - print(" train in %.3f s" % (time.time() - t0)) + res.train_time = time.time() - t0 + print(" train in %.3f s" % res.train_time) + return index + +###################################################### +# Populating index +###################################################### + +def run_add(args, ds, index, res): print("adding") t0 = time.time() if args.add_bs == -1: + assert args.split == [1, 0], "split not supported with full batch add" index.add(sanitize(ds.get_database())) else: + totn = ds.nb // args.split[0] # approximate i0 = 0 - for xblock in ds.database_iterator(bs=args.add_bs): + print(f"Adding in block sizes {args.add_bs} with split {args.split}") + for xblock in ds.database_iterator(bs=args.add_bs, split=args.split): i1 = i0 + len(xblock) print(" adding %d:%d / %d [%.3f s, RSS %d kiB] " % ( - i0, i1, ds.nb, time.time() - t0, + i0, i1, totn, time.time() - t0, faiss.get_mem_usage_kb())) index.add(xblock) i0 = i1 - print(" add in %.3f s" % (time.time() - t0)) - if args.indexfile: - print("storing", args.indexfile) - faiss.write_index(index, args.indexfile) + res.t_add = time.time() - t0 + print(f" add in {res.t_add:.3f} s index size {index.ntotal}") -if args.no_precomputed_tables: - if isinstance(index_ivf, faiss.IndexIVFPQ): - print("disabling precomputed table") - index_ivf.use_precomputed_table = -1 - index_ivf.precomputed_table.clear() -if args.indexfile: - print("index size on disk: ", os.stat(args.indexfile).st_size) +###################################################### +# Search +###################################################### -if hasattr(index, "code_size"): - print("vector code_size", index.code_size) +def run_search(args, ds, index, res): -if hasattr(index_ivf, "code_size"): - print("vector code_size (IVF)", index_ivf.code_size) + index_ivf, vec_transform = unwind_index_ivf(index) -print("current RSS:", faiss.get_mem_usage_kb() * 1024) + if args.no_precomputed_tables: + if isinstance(index_ivf, faiss.IndexIVFPQ): + print("disabling precomputed table") + index_ivf.use_precomputed_table = -1 + index_ivf.precomputed_table.clear() -precomputed_table_size = 0 -if hasattr(index_ivf, 'precomputed_table'): - precomputed_table_size = index_ivf.precomputed_table.size() * 4 + if args.indexfile: + print("index size on disk: ", os.stat(args.indexfile).st_size) -print("precomputed tables size:", precomputed_table_size) + if hasattr(index, "code_size"): + print("vector code_size", index.code_size) + if hasattr(index_ivf, "code_size"): + print("vector code_size (IVF)", index_ivf.code_size) -############################################################# -# Index is ready -############################################################# + print("current RSS:", faiss.get_mem_usage_kb() * 1024) -xq = sanitize(ds.get_queries()) -gt = ds.get_groundtruth(k=args.k) -assert gt.shape[1] == args.k + precomputed_table_size = 0 + if hasattr(index_ivf, 'precomputed_table'): + precomputed_table_size = index_ivf.precomputed_table.size() * 4 -if args.searchthreads != -1: - print("Setting nb of threads to", args.searchthreads) - faiss.omp_set_num_threads(args.searchthreads) -else: - print("nb search threads: ", faiss.omp_get_max_threads()) + print("precomputed tables size:", precomputed_table_size) -ps = faiss.ParameterSpace() -ps.initialize(index) + # Index is ready -parametersets = args.searchparams + xq = sanitize(ds.get_queries()) + nq, d = xq.shape + gt = ds.get_groundtruth(k=args.k) + if not args.accept_short_gt: # Deep1B has only a single NN per query + assert gt.shape[1] == args.k + if args.searchthreads != -1: + print("Setting nb of threads to", args.searchthreads) + faiss.omp_set_num_threads(args.searchthreads) + else: + print("nb search threads: ", faiss.omp_get_max_threads()) -if args.inter: - header = ( - '%-40s inter@%3d time(ms/q) nb distances #runs' % - ("parameters", args.k) - ) -else: + ps = faiss.ParameterSpace() + ps.initialize(index) - header = ( - '%-40s R@1 R@10 R@100 time(ms/q) nb distances #runs' % - "parameters" - ) + parametersets = args.searchparams -def compute_inter(a, b): - nq, rank = a.shape - ninter = sum( - np.intersect1d(a[i, :rank], b[i, :rank]).size - for i in range(nq) - ) - return ninter / a.size + if args.inter: + header = ( + '%-40s inter@%3d time(ms/q) nb distances #runs' % + ("parameters", args.k) + ) + else: + header = ( + '%-40s R@1 R@10 R@100 time(ms/q) nb distances #runs' % + "parameters" + ) -def eval_setting(index, xq, gt, k, inter, min_time): - nq = xq.shape[0] - ivf_stats = faiss.cvar.indexIVF_stats - ivf_stats.reset() - nrun = 0 - t0 = time.time() - while True: - D, I = index.search(xq, k) - nrun += 1 - t1 = time.time() - if t1 - t0 > min_time: - break - ms_per_query = ((t1 - t0) * 1000.0 / nq / nrun) - if inter: - rank = k - inter_measure = compute_inter(gt[:, :rank], I[:, :rank]) - print("%.4f" % inter_measure, end=' ') - else: - for rank in 1, 10, 100: - n_ok = (I[:, :rank] == gt[:, :1]).sum() - print("%.4f" % (n_ok / float(nq)), end=' ') - print(" %9.5f " % ms_per_query, end=' ') - print("%12d " % (ivf_stats.ndis / nrun), end=' ') - print(nrun) + res.search_results = {} + if parametersets == ['autotune']: + + ps.n_experiments = args.n_autotune + ps.min_test_duration = args.min_test_duration + + for kv in args.autotune_max: + k, vmax = kv.split(':') + vmax = float(vmax) + print("limiting %s to %g" % (k, vmax)) + pr = ps.add_range(k) + values = faiss.vector_to_array(pr.values) + values = np.array([v for v in values if v < vmax]) + faiss.copy_array_to_vector(values, pr.values) + + for kv in args.autotune_range: + k, vals = kv.split(':') + vals = np.fromstring(vals, sep=',') + print("setting %s to %s" % (k, vals)) + pr = ps.add_range(k) + faiss.copy_array_to_vector(vals, pr.values) + + # setup the Criterion object + if args.inter: + print("Optimize for intersection @ ", args.k) + crit = faiss.IntersectionCriterion(nq, args.k) + else: + print("Optimize for 1-recall @ 1") + crit = faiss.OneRecallAtRCriterion(nq, 1) + # by default, the criterion will request only 1 NN + crit.nnn = args.k + crit.set_groundtruth(None, gt.astype('int64')) -if parametersets == ['autotune']: + # then we let Faiss find the optimal parameters by itself + print("exploring operating points, %d threads" % faiss.omp_get_max_threads()); + ps.display() - ps.n_experiments = args.n_autotune - ps.min_test_duration = args.min_test_duration + t0 = time.time() + op = ps.explore(index, xq, crit) + res.t_explore = time.time() - t0 + print("Done in %.3f s, available OPs:" % res.t_explore) - for kv in args.autotune_max: - k, vmax = kv.split(':') - vmax = float(vmax) - print("limiting %s to %g" % (k, vmax)) - pr = ps.add_range(k) - values = faiss.vector_to_array(pr.values) - values = np.array([v for v in values if v < vmax]) - faiss.copy_array_to_vector(values, pr.values) + op.display() - for kv in args.autotune_range: - k, vals = kv.split(':') - vals = np.fromstring(vals, sep=',') - print("setting %s to %s" % (k, vals)) - pr = ps.add_range(k) - faiss.copy_array_to_vector(vals, pr.values) + print("Re-running evaluation on selected OPs") + print(header) + opv = op.optimal_pts + maxw = max(max(len(opv.at(i).key) for i in range(opv.size())), 40) + for i in range(opv.size()): + opt = opv.at(i) + + ps.set_index_parameters(index, opt.key) + + print(opt.key.ljust(maxw), end=' ') + sys.stdout.flush() + + res_i = eval_setting(index, xq, gt, args.k, args.inter, args.min_test_duration) + res.search_results[opt.key] = res_i - # setup the Criterion object - if args.inter: - print("Optimize for intersection @ ", args.k) - crit = faiss.IntersectionCriterion(nq, args.k) else: - print("Optimize for 1-recall @ 1") - crit = faiss.OneRecallAtRCriterion(nq, 1) + print(header) + for param in parametersets: + print("%-40s " % param, end=' ') + sys.stdout.flush() + ps.set_index_parameters(index, param) - # by default, the criterion will request only 1 NN - crit.nnn = args.k - crit.set_groundtruth(None, gt.astype('int64')) + res_i = eval_setting(index, xq, gt, args.k, args.inter, args.min_test_duration) + res.search_results[param] = res_i - # then we let Faiss find the optimal parameters by itself - print("exploring operating points, %d threads" % faiss.omp_get_max_threads()); - ps.display() - t0 = time.time() - op = ps.explore(index, xq, crit) - print("Done in %.3f s, available OPs:" % (time.time() - t0)) - op.display() +###################################################### +# Driver function +###################################################### - print("Re-running evaluation on selected OPs") - print(header) - opv = op.optimal_pts - maxw = max(max(len(opv.at(i).key) for i in range(opv.size())), 40) - for i in range(opv.size()): - opt = opv.at(i) +def main(): + + parser = argparse.ArgumentParser() + + def aa(*args, **kwargs): + group.add_argument(*args, **kwargs) + + group = parser.add_argument_group('general options') + aa('--nthreads', default=-1, type=int, + help='nb of threads to use at train and add time') + aa('--json', default=False, action="store_true", + help="output stats in JSON format at the end") + aa('--todo', default=["check_files"], + choices=["train", "add", "search", "check_files"], + nargs="+", help='what to do (check_files means decide depending on which index files exist)') + + group = parser.add_argument_group('dataset options') + aa('--db', default='deep1M', help='dataset') + aa('--compute_gt', default=False, action='store_true', + help='compute and store the groundtruth') + aa('--force_IP', default=False, action="store_true", + help='force IP search instead of L2') + aa('--accept_short_gt', default=False, action='store_true', + help='work around a problem with Deep1B GT') + + group = parser.add_argument_group('index construction') + aa('--indexkey', default='HNSW32', help='index_factory type') + aa('--trained_indexfile', default='', + help='file to read or write a trained index from') + aa('--maxtrain', default=256 * 256, type=int, + help='maximum number of training points (0 to set automatically)') + aa('--indexfile', default='', help='file to read or write index from') + aa('--split', default=[1, 0], type=int, nargs=2, help="database split") + aa('--add_bs', default=-1, type=int, + help='add elements index by batches of this size') + + group = parser.add_argument_group('IVF options') + aa('--by_residual', default=-1, type=int, + help="set if index should use residuals (default=unchanged)") + aa('--no_precomputed_tables', action='store_true', default=False, + help='disable precomputed tables (uses less memory)') + aa('--get_centroids_from', default='', + help='get the centroids from this index (to speed up training)') + aa('--clustering_niter', default=-1, type=int, + help='number of clustering iterations (-1 = leave default)') + aa('--train_on_gpu', default=False, action='store_true', + help='do training on GPU') + + group = parser.add_argument_group('index-specific options') + aa('--M0', default=-1, type=int, help='size of base level for HNSW') + aa('--RQ_train_default', default=False, action="store_true", + help='disable progressive dim training for RQ') + aa('--RQ_beam_size', default=-1, type=int, + help='set beam size at add time') + aa('--LSQ_encode_ils_iters', default=-1, type=int, + help='ILS iterations for LSQ') + aa('--RQ_use_beam_LUT', default=-1, type=int, + help='use beam LUT at add time') + + group = parser.add_argument_group('searching') + aa('--k', default=100, type=int, help='nb of nearest neighbors') + aa('--inter', default=False, action='store_true', + help='use intersection measure instead of 1-recall as metric') + aa('--searchthreads', default=-1, type=int, + help='nb of threads to use at search time') + aa('--searchparams', nargs='+', default=['autotune'], + help="search parameters to use (can be autotune or a list of params)") + aa('--n_autotune', default=500, type=int, + help="max nb of autotune experiments") + aa('--autotune_max', default=[], nargs='*', + help='set max value for autotune variables format "var:val" (exclusive)') + aa('--autotune_range', default=[], nargs='*', + help='set complete autotune range, format "var:val1,val2,..."') + aa('--min_test_duration', default=3.0, type=float, + help='run test at least for so long to avoid jitter') + aa('--indexes_to_merge', default=[], nargs="*", + help="load these indexes to search and merge them before searching") + + args = parser.parse_args() + + if args.todo == ["check_files"]: + if os.path.exists(args.indexfile): + args.todo = ["search"] + elif os.path.exists(args.trained_indexfile): + args.todo = ["add", "search"] + else: + args.todo = ["train", "add", "search"] + print("setting todo to", args.todo) + + print("args:", args) + + os.system('echo -n "nb processors "; ' + 'cat /proc/cpuinfo | grep ^processor | wc -l; ' + 'cat /proc/cpuinfo | grep ^"model name" | tail -1') - ps.set_index_parameters(index, opt.key) + # object to collect results + res = argparse.Namespace() + res.args = args.__dict__ - print(opt.key.ljust(maxw), end=' ') - sys.stdout.flush() + res.cpu_model = [ + l for l in open("/proc/cpuinfo", "r") + if "model name" in l][0] - eval_setting(index, xq, gt, args.k, args.inter, args.min_test_duration) + print("Load dataset") -else: - print(header) - for param in parametersets: - print("%-40s " % param, end=' ') - sys.stdout.flush() - ps.set_index_parameters(index, param) + ds = datasets.load_dataset( + dataset=args.db, compute_gt=args.compute_gt) - eval_setting(index, xq, gt, args.k, args.inter, args.min_test_duration) + if args.force_IP: + ds.metric = "IP" + + print(ds) + + if args.nthreads != -1: + print("Set nb of threads to", args.nthreads) + faiss.omp_set_num_threads(args.nthreads) + else: + print("nb threads: ", faiss.omp_get_max_threads()) + + index = None + if "train" in args.todo: + print("================== Training index") + index = run_train(args, ds, res) + if args.trained_indexfile: + print("storing trained index", args.trained_indexfile) + faiss.write_index(index, args.trained_indexfile) + + if "add" in args.todo: + if not index: + assert args.trained_indexfile + print("reading trained index", args.trained_indexfile) + index = faiss.read_index(args.trained_indexfile) + + print("================== Adding vectors to index") + run_add(args, ds, index, res) + if args.indexfile: + print("storing", args.indexfile) + faiss.write_index(index, args.indexfile) + + if "search" in args.todo: + if not index: + if args.indexfile: + print("reading index", args.indexfile) + index = faiss.read_index(args.indexfile) + elif args.indexes_to_merge: + print(f"Merging {len(args.indexes_to_merge)} indexes") + sz = 0 + for fname in args.indexes_to_merge: + print(f" reading {fname} (current size {sz})") + index_i = faiss.read_index(fname) + if index is None: + index = index_i + else: + index.merge_from(index_i, index.ntotal) + sz = index.ntotal + else: + assert False, "provide --indexfile" + + print("================== Searching") + run_search(args, ds, index, res) + + if args.json: + print("JSON results:", json.dumps(res.__dict__)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/thirdparty/faiss/benchs/bench_fw/__init__.py b/thirdparty/faiss/benchs/bench_fw/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/thirdparty/faiss/benchs/bench_fw/benchmark.py b/thirdparty/faiss/benchs/bench_fw/benchmark.py new file mode 100644 index 000000000..0d7f1d8b0 --- /dev/null +++ b/thirdparty/faiss/benchs/bench_fw/benchmark.py @@ -0,0 +1,722 @@ +# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +from contextlib import contextmanager +import json +import logging +from dataclasses import dataclass +from multiprocessing.pool import ThreadPool +from operator import itemgetter +from statistics import median, mean +from time import perf_counter +from typing import Any, List, Optional +from .descriptors import DatasetDescriptor, IndexDescriptor + +import faiss # @manual=//faiss/python:pyfaiss_gpu +from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu + knn_intersection_measure, + OperatingPointsWithRanges, +) +from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu + add_preassigned, +) + +import numpy as np + +from scipy.optimize import curve_fit + +logger = logging.getLogger(__name__) + + +@contextmanager +def timer(name) -> float: + logger.info(f"Measuring {name}") + t1 = t2 = perf_counter() + yield lambda: t2 - t1 + t2 = perf_counter() + logger.info(f"Time for {name}: {t2 - t1:.3f} seconds") + + +def refine_distances_knn( + D: np.ndarray, I: np.ndarray, xq: np.ndarray, xb: np.ndarray, metric +): + return np.where( + I >= 0, + np.square(np.linalg.norm(xq[:, None] - xb[I], axis=2)) + if metric == faiss.METRIC_L2 + else np.einsum("qd,qkd->qk", xq, xb[I]), + D, + ) + + +def refine_distances_range( + lims: np.ndarray, + D: np.ndarray, + I: np.ndarray, + xq: np.ndarray, + xb: np.ndarray, + metric, +): + with ThreadPool(32) as pool: + R = pool.map( + lambda i: ( + np.sum(np.square(xq[i] - xb[I[lims[i]:lims[i + 1]]]), axis=1) + if metric == faiss.METRIC_L2 + else np.tensordot( + xq[i], xb[I[lims[i]:lims[i + 1]]], axes=(0, 1) + ) + ) + if lims[i + 1] > lims[i] + else [], + range(len(lims) - 1), + ) + return np.hstack(R) + + +def range_search_pr_curve( + dist_ann: np.ndarray, metric_score: np.ndarray, gt_rsm: float +): + assert dist_ann.shape == metric_score.shape + assert dist_ann.ndim == 1 + sort_by_dist_ann = dist_ann.argsort() + dist_ann = dist_ann[sort_by_dist_ann] + metric_score = metric_score[sort_by_dist_ann] + cum_score = np.cumsum(metric_score) + precision = cum_score / np.arange(1, len(cum_score) + 1) + recall = cum_score / gt_rsm + unique_key = np.round(precision * 100) * 100 + np.round(recall * 100) + tbl = np.vstack( + [dist_ann, metric_score, cum_score, precision, recall, unique_key] + ) + group_by_dist_max_cum_score = np.empty(len(dist_ann), bool) + group_by_dist_max_cum_score[-1] = True + group_by_dist_max_cum_score[:-1] = dist_ann[1:] != dist_ann[:-1] + tbl = tbl[:, group_by_dist_max_cum_score] + _, unique_key_idx = np.unique(tbl[5], return_index=True) + dist_ann, metric_score, cum_score, precision, recall, unique_key = tbl[ + :, np.sort(unique_key_idx) + ].tolist() + return { + "dist_ann": dist_ann, + "metric_score_sample": metric_score, + "cum_score": cum_score, + "precision": precision, + "recall": recall, + "unique_key": unique_key, + } + + +def set_index_parameter(index, name, val): + index = faiss.downcast_index(index) + + if isinstance(index, faiss.IndexPreTransform): + set_index_parameter(index.index, name, val) + elif name.startswith("quantizer_"): + index_ivf = faiss.extract_index_ivf(index) + set_index_parameter( + index_ivf.quantizer, name[name.find("_") + 1:], val + ) + elif name == "efSearch": + index.hnsw.efSearch + index.hnsw.efSearch = int(val) + elif name == "nprobe": + index_ivf = faiss.extract_index_ivf(index) + index_ivf.nprobe + index_ivf.nprobe = int(val) + elif name == "noop": + pass + else: + raise RuntimeError(f"could not set param {name} on {index}") + + +def optimizer(codec, search, cost_metric, perf_metric): + op = OperatingPointsWithRanges() + op.add_range("noop", [0]) + codec_ivf = faiss.try_extract_index_ivf(codec) + if codec_ivf is not None: + op.add_range( + "nprobe", + [2**i for i in range(12) if 2**i < codec_ivf.nlist * 0.1], + ) + + totex = op.num_experiments() + rs = np.random.RandomState(123) + if totex > 1: + experiments = rs.permutation(totex - 2) + 1 + experiments = [0, totex - 1] + list(experiments) + else: + experiments = [0] + + print(f"total nb experiments {totex}, running {len(experiments)}") + + for cno in experiments: + key = op.cno_to_key(cno) + parameters = op.get_parameters(key) + + (max_perf, min_cost) = op.predict_bounds(key) + if not op.is_pareto_optimal(max_perf, min_cost): + logger.info( + f"{cno=:4d} {str(parameters):50}: SKIP, {max_perf=:.3f} {min_cost=:.3f}", + ) + continue + + logger.info(f"{cno=:4d} {str(parameters):50}: RUN") + cost, perf = search( + parameters, + cost_metric, + perf_metric, + ) + logger.info( + f"{cno=:4d} {str(parameters):50}: DONE, {cost=:.3f} {perf=:.3f}" + ) + op.add_operating_point(key, perf, cost) + + +def distance_ratio_measure(I, R, D_GT, metric): + sum_of_R = np.sum(np.where(I >= 0, R, 0)) + sum_of_D_GT = np.sum(np.where(I >= 0, D_GT, 0)) + if metric == faiss.METRIC_INNER_PRODUCT: + return (sum_of_R / sum_of_D_GT).item() + elif metric == faiss.METRIC_L2: + return (sum_of_D_GT / sum_of_R).item() + else: + raise RuntimeError(f"unknown metric {metric}") + + +# range_metric possible values: +# +# radius +# [0..radius) -> 1 +# [radius..inf) -> 0 +# +# [[radius1, score1], ...] +# [0..radius1) -> score1 +# [radius1..radius2) -> score2 +# +# [[radius1_from, radius1_to, score1], ...] +# [radius1_from, radius1_to) -> score1, +# [radius2_from, radius2_to) -> score2 +def get_range_search_metric_function(range_metric, D, R): + if D is not None: + assert R is not None + assert D.shape == R.shape + if isinstance(range_metric, list): + aradius, ascore, aradius_from, aradius_to = [], [], [], [] + radius_to = 0 + for rsd in range_metric: + assert isinstance(rsd, list) + if len(rsd) == 3: + radius_from, radius_to, score = rsd + elif len(rsd) == 2: + radius_from = radius_to + radius_to, score = rsd + else: + raise AssertionError(f"invalid range definition {rsd}") + # radius_from and radius_to are compressed distances, + # we need to convert them to real embedding distances. + if D is not None: + sample_idxs = np.argwhere((D <= radius_to) & (D > radius_from)) + assert len(sample_idxs) > 0 + real_radius = np.mean(R[sample_idxs]).item() + else: + real_radius = mean([radius_from, radius_to]) + logger.info( + f"range_search_metric_function {radius_from=} {radius_to=} {real_radius=} {score=}" + ) + aradius.append(real_radius) + ascore.append(score) + aradius_from.append(radius_from) + aradius_to.append(radius_to) + + def sigmoid(x, a, b, c): + return a / (1 + np.exp(b * x - c)) + + cutoff = max(aradius) + popt, _ = curve_fit(sigmoid, aradius, ascore, [1, 5, 5]) + + for r in np.arange(0, cutoff + 0.05, 0.05): + logger.info( + f"range_search_metric_function {r=} {sigmoid(r, *popt)=}" + ) + + assert isinstance(cutoff, float) + return ( + cutoff, + lambda x: np.where(x < cutoff, sigmoid(x, *popt), 0), + popt.tolist(), + list(zip(aradius, ascore, aradius_from, aradius_to, strict=True)) + ) + else: + # Assuming that the range_metric is a float, + # so the range is [0..range_metric). + # D is the result of a range_search with a radius of range_metric, + # but both range_metric and D may be compressed distances. + # We approximate the real embedding distance as max(R). + if R is not None: + real_range = np.max(R).item() + else: + real_range = range_metric + logger.info( + f"range_search_metric_function {range_metric=} {real_range=}" + ) + assert isinstance(real_range, float) + return real_range * 2, lambda x: np.where(x < real_range, 1, 0), [], [] + + +@dataclass +class Benchmark: + training_vectors: Optional[DatasetDescriptor] = None + db_vectors: Optional[DatasetDescriptor] = None + query_vectors: Optional[DatasetDescriptor] = None + index_descs: Optional[List[IndexDescriptor]] = None + range_ref_index_desc: Optional[str] = None + k: Optional[int] = None + distance_metric: str = "METRIC_L2" + + def __post_init__(self): + if self.distance_metric == "METRIC_INNER_PRODUCT": + self.distance_metric_type = faiss.METRIC_INNER_PRODUCT + elif self.distance_metric == "METRIC_L2": + self.distance_metric_type = faiss.METRIC_L2 + else: + raise ValueError + self.cached_index_key = None + + def set_io(self, benchmark_io): + self.io = benchmark_io + self.io.distance_metric = self.distance_metric + self.io.distance_metric_type = self.distance_metric_type + + def get_index_desc(self, factory: str) -> Optional[IndexDescriptor]: + for desc in self.index_descs: + if desc.factory == factory: + return desc + return None + + def get_index(self, index_desc: IndexDescriptor): + if self.cached_index_key != index_desc.factory: + xb = self.io.get_dataset(self.db_vectors) + index = faiss.clone_index( + self.io.get_codec(index_desc, xb.shape[1]) + ) + assert index.ntotal == 0 + logger.info("Adding vectors to index") + index_ivf = faiss.try_extract_index_ivf(index) + if index_ivf is not None: + QD, QI, _, QP = self.knn_search( + index_desc, + parameters=None, + db_vectors=None, + query_vectors=self.db_vectors, + k=1, + index=index_ivf.quantizer, + level=1, + ) + print(f"{QI.ravel().shape=}") + add_preassigned(index_ivf, xb, QI.ravel()) + else: + index.add(xb) + assert index.ntotal == xb.shape[0] + logger.info("Added vectors to index") + self.cached_index_key = index_desc.factory + self.cached_index = index + return self.cached_index + + def range_search_reference(self, index_desc, range_metric): + logger.info("range_search_reference: begin") + if isinstance(range_metric, list): + assert len(range_metric) > 0 + ri = len(range_metric[0]) - 1 + m_radius = ( + max(rm[ri] for rm in range_metric) + if self.distance_metric_type == faiss.METRIC_L2 + else min(rm[ri] for rm in range_metric) + ) + else: + m_radius = range_metric + + lims, D, I, R, P = self.range_search( + index_desc, + index_desc.parameters, + radius=m_radius, + ) + flat = index_desc.factory == "Flat" + ( + gt_radius, + range_search_metric_function, + coefficients, + coefficients_training_data, + ) = get_range_search_metric_function( + range_metric, + D if not flat else None, + R if not flat else None, + ) + logger.info("range_search_reference: end") + return gt_radius, range_search_metric_function, coefficients, coefficients_training_data + + def estimate_range(self, index_desc, parameters, range_scoring_radius): + D, I, R, P = self.knn_search( + index_desc, parameters, self.db_vectors, self.query_vectors + ) + samples = [] + for i, j in np.argwhere(R < range_scoring_radius): + samples.append((R[i, j].item(), D[i, j].item())) + samples.sort(key=itemgetter(0)) + return median(r for _, r in samples[-3:]) + + def range_search( + self, + index_desc: IndexDescriptor, + parameters: Optional[dict[str, int]], + radius: Optional[float] = None, + gt_radius: Optional[float] = None, + ): + logger.info("range_search: begin") + flat = index_desc.factory == "Flat" + if radius is None: + assert gt_radius is not None + radius = ( + gt_radius + if flat + else self.estimate_range(index_desc, parameters, gt_radius) + ) + logger.info(f"Radius={radius}") + filename = self.io.get_filename_range_search( + factory=index_desc.factory, + parameters=parameters, + level=0, + db_vectors=self.db_vectors, + query_vectors=self.query_vectors, + r=radius, + ) + if self.io.file_exist(filename): + logger.info(f"Using cached results for {index_desc.factory}") + lims, D, I, R, P = self.io.read_file( + filename, ["lims", "D", "I", "R", "P"] + ) + else: + xq = self.io.get_dataset(self.query_vectors) + index = self.get_index(index_desc) + if parameters: + for name, val in parameters.items(): + set_index_parameter(index, name, val) + + index_ivf = faiss.try_extract_index_ivf(index) + if index_ivf is not None: + QD, QI, _, QP = self.knn_search( + index_desc, + parameters=None, + db_vectors=None, + query_vectors=self.query_vectors, + k=index.nprobe, + index=index_ivf.quantizer, + level=1, + ) + # QD = QD[:, :index.nprobe] + # QI = QI[:, :index.nprobe] + faiss.cvar.indexIVF_stats.reset() + with timer("range_search_preassigned") as t: + lims, D, I = index.range_search_preassigned(xq, radius, QI, QD) + else: + with timer("range_search") as t: + lims, D, I = index.range_search(xq, radius) + if flat: + R = D + else: + xb = self.io.get_dataset(self.db_vectors) + R = refine_distances_range( + lims, D, I, xq, xb, self.distance_metric_type + ) + P = { + "time": t(), + "radius": radius, + "count": lims[-1].item(), + "parameters": parameters, + "index": index_desc.factory, + } + if index_ivf is not None: + stats = faiss.cvar.indexIVF_stats + P |= { + "quantizer": QP, + "nq": stats.nq, + "nlist": stats.nlist, + "ndis": stats.ndis, + "nheap_updates": stats.nheap_updates, + "quantization_time": stats.quantization_time, + "search_time": stats.search_time, + } + self.io.write_file( + filename, ["lims", "D", "I", "R", "P"], [lims, D, I, R, P] + ) + logger.info("range_seach: end") + return lims, D, I, R, P + + def range_ground_truth(self, gt_radius, range_search_metric_function): + logger.info("range_ground_truth: begin") + flat_desc = self.get_index_desc("Flat") + lims, D, I, R, P = self.range_search( + flat_desc, + flat_desc.parameters, + radius=gt_radius, + ) + gt_rsm = np.sum(range_search_metric_function(R)).item() + logger.info("range_ground_truth: end") + return gt_rsm + + def range_search_benchmark( + self, + results: dict[str, Any], + index_desc: IndexDescriptor, + metric_key: str, + gt_radius: float, + range_search_metric_function, + gt_rsm: float, + ): + logger.info(f"range_search_benchmark: begin {index_desc.factory=}") + xq = self.io.get_dataset(self.query_vectors) + (nq, d) = xq.shape + logger.info( + f"Searching {index_desc.factory} with {nq} vectors of dimension {d}" + ) + codec = self.io.get_codec(index_desc, d) + faiss.omp_set_num_threads(16) + + def experiment(parameters, cost_metric, perf_metric): + nonlocal results + key = self.io.get_filename_evaluation_name( + factory=index_desc.factory, + parameters=parameters, + level=0, + db_vectors=self.db_vectors, + query_vectors=self.query_vectors, + evaluation_name=metric_key, + ) + if key in results["experiments"]: + metrics = results["experiments"][key] + else: + lims, D, I, R, P = self.range_search( + index_desc, parameters, gt_radius=gt_radius + ) + range_search_metric = range_search_metric_function(R) + range_search_pr = range_search_pr_curve( + D, range_search_metric, gt_rsm + ) + range_score_sum = np.sum(range_search_metric).item() + metrics = P | { + "range_score_sum": range_score_sum, + "range_score_max_recall": range_score_sum / gt_rsm, + "range_search_pr": range_search_pr, + } + results["experiments"][key] = metrics + return metrics[cost_metric], metrics[perf_metric] + + for cost_metric in ["time"]: + for perf_metric in ["range_score_max_recall"]: + optimizer( + codec, + experiment, + cost_metric, + perf_metric, + ) + logger.info("range_search_benchmark: end") + return results + + def knn_search( + self, + index_desc: IndexDescriptor, + parameters: Optional[dict[str, int]], + db_vectors: Optional[DatasetDescriptor], + query_vectors: DatasetDescriptor, + k: Optional[int] = None, + index: Optional[faiss.Index] = None, + level: int = 0, + ): + assert level >= 0 + if level == 0: + assert index is None + assert db_vectors is not None + else: + assert index is not None # quantizer + assert db_vectors is None + logger.info("knn_seach: begin") + k = k if k is not None else self.k + flat = index_desc.factory == "Flat" + filename = self.io.get_filename_knn_search( + factory=index_desc.factory, + parameters=parameters, + level=level, + db_vectors=db_vectors, + query_vectors=query_vectors, + k=k, + ) + if self.io.file_exist(filename): + logger.info(f"Using cached results for {index_desc.factory}") + D, I, R, P = self.io.read_file(filename, ["D", "I", "R", "P"]) + else: + xq = self.io.get_dataset(query_vectors) + if index is None: + index = self.get_index(index_desc) + if parameters: + for name, val in parameters.items(): + set_index_parameter(index, name, val) + + index_ivf = faiss.try_extract_index_ivf(index) + if index_ivf is not None: + QD, QI, _, QP = self.knn_search( + index_desc, + parameters=None, + db_vectors=None, + query_vectors=query_vectors, + k=index.nprobe, + index=index_ivf.quantizer, + level=level + 1, + ) + # QD = QD[:, :index.nprobe] + # QI = QI[:, :index.nprobe] + faiss.cvar.indexIVF_stats.reset() + with timer("knn search_preassigned") as t: + D, I = index.search_preassigned(xq, k, QI, QD) + else: + with timer("knn search") as t: + D, I = index.search(xq, k) + if flat or level > 0: + R = D + else: + xb = self.io.get_dataset(db_vectors) + R = refine_distances_knn( + D, I, xq, xb, self.distance_metric_type + ) + P = { + "time": t(), + "parameters": parameters, + "index": index_desc.factory, + "level": level, + } + if index_ivf is not None: + stats = faiss.cvar.indexIVF_stats + P |= { + "quantizer": QP, + "nq": stats.nq, + "nlist": stats.nlist, + "ndis": stats.ndis, + "nheap_updates": stats.nheap_updates, + "quantization_time": stats.quantization_time, + "search_time": stats.search_time, + } + self.io.write_file(filename, ["D", "I", "R", "P"], [D, I, R, P]) + logger.info("knn_seach: end") + return D, I, R, P + + def knn_ground_truth(self): + logger.info("knn_ground_truth: begin") + flat_desc = self.get_index_desc("Flat") + self.gt_knn_D, self.gt_knn_I, _, _ = self.knn_search( + flat_desc, + flat_desc.parameters, + self.db_vectors, + self.query_vectors, + ) + logger.info("knn_ground_truth: end") + + def knn_search_benchmark( + self, results: dict[str, Any], index_desc: IndexDescriptor + ): + logger.info(f"knn_search_benchmark: begin {index_desc.factory=}") + xq = self.io.get_dataset(self.query_vectors) + (nq, d) = xq.shape + logger.info( + f"Searching {index_desc.factory} with {nq} vectors of dimension {d}" + ) + codec = self.io.get_codec(index_desc, d) + codec_ivf = faiss.try_extract_index_ivf(codec) + if codec_ivf is not None: + results["indices"][index_desc.factory] = {"nlist": codec_ivf.nlist} + + faiss.omp_set_num_threads(16) + + def experiment(parameters, cost_metric, perf_metric): + nonlocal results + key = self.io.get_filename_evaluation_name( + factory=index_desc.factory, + parameters=parameters, + level=0, + db_vectors=self.db_vectors, + query_vectors=self.query_vectors, + evaluation_name="knn", + ) + if key in results["experiments"]: + metrics = results["experiments"][key] + else: + D, I, R, P = self.knn_search( + index_desc, parameters, self.db_vectors, self.query_vectors + ) + metrics = P | { + "knn_intersection": knn_intersection_measure( + I, self.gt_knn_I + ), + "distance_ratio": distance_ratio_measure( + I, R, self.gt_knn_D, self.distance_metric_type + ), + } + results["experiments"][key] = metrics + return metrics[cost_metric], metrics[perf_metric] + + for cost_metric in ["time"]: + for perf_metric in ["knn_intersection", "distance_ratio"]: + optimizer( + codec, + experiment, + cost_metric, + perf_metric, + ) + logger.info("knn_search_benchmark: end") + return results + + def benchmark(self) -> str: + logger.info("begin evaluate") + results = {"indices": {}, "experiments": {}} + if self.get_index_desc("Flat") is None: + self.index_descs.append(IndexDescriptor(factory="Flat")) + self.knn_ground_truth() + for index_desc in self.index_descs: + results = self.knn_search_benchmark( + results=results, + index_desc=index_desc, + ) + + if self.range_ref_index_desc is not None: + index_desc = self.get_index_desc(self.range_ref_index_desc) + if index_desc is None: + raise ValueError( + f"Unknown range index {self.range_ref_index_desc}" + ) + if index_desc.range_metrics is None: + raise ValueError( + f"Range index {index_desc.factory} has no radius_score" + ) + results["metrics"] = {} + for metric_key, range_metric in index_desc.range_metrics.items(): + ( + gt_radius, + range_search_metric_function, + coefficients, + coefficients_training_data, + ) = self.range_search_reference(index_desc, range_metric) + results["metrics"][metric_key] = { + "coefficients": coefficients, + "training_data": coefficients_training_data, + } + gt_rsm = self.range_ground_truth( + gt_radius, range_search_metric_function + ) + for index_desc in self.index_descs: + results = self.range_search_benchmark( + results=results, + index_desc=index_desc, + metric_key=metric_key, + gt_radius=gt_radius, + range_search_metric_function=range_search_metric_function, + gt_rsm=gt_rsm, + ) + self.io.write_json(results, "result.json", overwrite=True) + logger.info("end evaluate") + return json.dumps(results) diff --git a/thirdparty/faiss/benchs/bench_fw/benchmark_io.py b/thirdparty/faiss/benchs/bench_fw/benchmark_io.py new file mode 100644 index 000000000..30fda9c72 --- /dev/null +++ b/thirdparty/faiss/benchs/bench_fw/benchmark_io.py @@ -0,0 +1,246 @@ +import io +import json +import logging +import os +from dataclasses import dataclass +from typing import Any, List, Optional +from zipfile import ZipFile + +import faiss # @manual=//faiss/python:pyfaiss_gpu + +import numpy as np + +from .descriptors import DatasetDescriptor, IndexDescriptor + +logger = logging.getLogger(__name__) + + +@dataclass +class BenchmarkIO: + path: str + + def __post_init__(self): + self.cached_ds = {} + self.cached_codec_key = None + + def get_filename_search( + self, + factory: str, + parameters: Optional[dict[str, int]], + level: int, + db_vectors: DatasetDescriptor, + query_vectors: DatasetDescriptor, + k: Optional[int] = None, + r: Optional[float] = None, + evaluation_name: Optional[str] = None, + ): + assert factory is not None + assert level is not None + assert self.distance_metric is not None + assert query_vectors is not None + assert self.distance_metric is not None + filename = f"{factory.lower().replace(',', '_')}." + if level > 0: + filename += f"l_{level}." + if db_vectors is not None: + filename += db_vectors.get_filename("d") + filename += query_vectors.get_filename("q") + filename += self.distance_metric.upper() + "." + if k is not None: + filename += f"k_{k}." + if r is not None: + filename += f"r_{int(r * 1000)}." + if parameters is not None: + for name, val in parameters.items(): + if name != "noop": + filename += f"{name}_{val}." + if evaluation_name is None: + filename += "zip" + else: + filename += evaluation_name + return filename + + def get_filename_knn_search( + self, + factory: str, + parameters: Optional[dict[str, int]], + level: int, + db_vectors: DatasetDescriptor, + query_vectors: DatasetDescriptor, + k: int, + ): + assert k is not None + return self.get_filename_search( + factory=factory, + parameters=parameters, + level=level, + db_vectors=db_vectors, + query_vectors=query_vectors, + k=k, + ) + + def get_filename_range_search( + self, + factory: str, + parameters: Optional[dict[str, int]], + level: int, + db_vectors: DatasetDescriptor, + query_vectors: DatasetDescriptor, + r: float, + ): + assert r is not None + return self.get_filename_search( + factory=factory, + parameters=parameters, + level=level, + db_vectors=db_vectors, + query_vectors=query_vectors, + r=r, + ) + + def get_filename_evaluation_name( + self, + factory: str, + parameters: Optional[dict[str, int]], + level: int, + db_vectors: DatasetDescriptor, + query_vectors: DatasetDescriptor, + evaluation_name: str, + ): + assert evaluation_name is not None + return self.get_filename_search( + factory=factory, + parameters=parameters, + level=level, + db_vectors=db_vectors, + query_vectors=query_vectors, + evaluation_name=evaluation_name, + ) + + def get_local_filename(self, filename): + return os.path.join(self.path, filename) + + def download_file_from_blobstore( + self, + filename: str, + bucket: Optional[str] = None, + path: Optional[str] = None, + ): + return self.get_local_filename(filename) + + def upload_file_to_blobstore( + self, + filename: str, + bucket: Optional[str] = None, + path: Optional[str] = None, + overwrite: bool = False, + ): + pass + + def file_exist(self, filename: str): + fn = self.get_local_filename(filename) + exists = os.path.exists(fn) + logger.info(f"{filename} {exists=}") + return exists + + def get_codec(self, index_desc: IndexDescriptor, d: int): + if index_desc.factory == "Flat": + return faiss.IndexFlat(d, self.distance_metric_type) + else: + if self.cached_codec_key != index_desc.factory: + codec = faiss.read_index( + self.get_local_filename(index_desc.path) + ) + assert ( + codec.metric_type == self.distance_metric_type + ), f"{codec.metric_type=} != {self.distance_metric_type=}" + logger.info(f"Loaded codec from {index_desc.path}") + self.cached_codec_key = index_desc.factory + self.cached_codec = codec + return self.cached_codec + + def read_file(self, filename: str, keys: List[str]): + fn = self.download_file_from_blobstore(filename) + logger.info(f"Loading file {fn}") + results = [] + with ZipFile(fn, "r") as zip_file: + for key in keys: + with zip_file.open(key, "r") as f: + if key in ["D", "I", "R", "lims"]: + results.append(np.load(f)) + elif key in ["P"]: + t = io.TextIOWrapper(f) + results.append(json.load(t)) + else: + raise AssertionError() + return results + + def write_file( + self, + filename: str, + keys: List[str], + values: List[Any], + overwrite: bool = False, + ): + fn = self.get_local_filename(filename) + with ZipFile(fn, "w") as zip_file: + for key, value in zip(keys, values, strict=True): + with zip_file.open(key, "w") as f: + if key in ["D", "I", "R", "lims"]: + np.save(f, value) + elif key in ["P"]: + t = io.TextIOWrapper(f, write_through=True) + json.dump(value, t) + else: + raise AssertionError() + self.upload_file_to_blobstore(filename, overwrite=overwrite) + + def get_dataset(self, dataset): + if dataset not in self.cached_ds: + self.cached_ds[dataset] = self.read_nparray( + os.path.join(self.path, dataset.tablename) + ) + return self.cached_ds[dataset] + + def read_nparray( + self, + filename: str, + ): + fn = self.download_file_from_blobstore(filename) + logger.info(f"Loading nparray from {fn}") + nparray = np.load(fn) + logger.info(f"Loaded nparray {nparray.shape} from {fn}") + return nparray + + def write_nparray( + self, + nparray: np.ndarray, + filename: str, + ): + fn = self.get_local_filename(filename) + logger.info(f"Saving nparray {nparray.shape} to {fn}") + np.save(fn, nparray) + self.upload_file_to_blobstore(filename) + + def read_json( + self, + filename: str, + ): + fn = self.download_file_from_blobstore(filename) + logger.info(f"Loading json {fn}") + with open(fn, "r") as fp: + json_dict = json.load(fp) + logger.info(f"Loaded json {json_dict} from {fn}") + return json_dict + + def write_json( + self, + json_dict: dict[str, Any], + filename: str, + overwrite: bool = False, + ): + fn = self.get_local_filename(filename) + logger.info(f"Saving json {json_dict} to {fn}") + with open(fn, "w") as fp: + json.dump(json_dict, fp) + self.upload_file_to_blobstore(filename, overwrite=overwrite) diff --git a/thirdparty/faiss/benchs/bench_fw/descriptors.py b/thirdparty/faiss/benchs/bench_fw/descriptors.py new file mode 100644 index 000000000..0268ec328 --- /dev/null +++ b/thirdparty/faiss/benchs/bench_fw/descriptors.py @@ -0,0 +1,55 @@ +# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +from dataclasses import dataclass +from typing import Any, List, Optional + + +@dataclass +class IndexDescriptor: + factory: str + bucket: Optional[str] = None + path: Optional[str] = None + parameters: Optional[dict[str, int]] = None + # range metric definitions + # key: name + # value: one of the following: + # + # radius + # [0..radius) -> 1 + # [radius..inf) -> 0 + # + # [[radius1, score1], ...] + # [0..radius1) -> score1 + # [radius1..radius2) -> score2 + # + # [[radius1_from, radius1_to, score1], ...] + # [radius1_from, radius1_to) -> score1, + # [radius2_from, radius2_to) -> score2 + range_metrics: Optional[dict[str, Any]] = None + + +@dataclass +class DatasetDescriptor: + namespace: Optional[str] = None + tablename: Optional[str] = None + partitions: Optional[List[str]] = None + num_vectors: Optional[int] = None + + def __hash__(self): + return hash(self.get_filename()) + + def get_filename( + self, + prefix: str = "v", + ) -> str: + filename = prefix + "_" + if self.namespace is not None: + filename += self.namespace + "_" + assert self.tablename is not None + filename += self.tablename + if self.partitions is not None: + filename += "_" + "_".join(self.partitions).replace("=", "_") + if self.num_vectors is not None: + filename += f"_{self.num_vectors}" + filename += "." + return filename diff --git a/thirdparty/faiss/benchs/bench_hybrid_cpu_gpu.py b/thirdparty/faiss/benchs/bench_hybrid_cpu_gpu.py index 8a509f323..779a09fef 100644 --- a/thirdparty/faiss/benchs/bench_hybrid_cpu_gpu.py +++ b/thirdparty/faiss/benchs/bench_hybrid_cpu_gpu.py @@ -530,14 +530,7 @@ def aa(*args, **kwargs): raise RuntimeError() totex = op.num_experiments() - rs = np.random.RandomState(123) - if totex < args.n_autotune: - experiments = rs.permutation(totex - 2) + 1 - else: - experiments = rs.randint( - totex - 2, size=args.n_autotune - 2, replace=False) - - experiments = [0, totex - 1] + list(experiments) + experiments = op.sample_experiments() print(f"total nb experiments {totex}, running {len(experiments)}") print("perform search") diff --git a/thirdparty/faiss/conda/faiss-gpu-raft/meta.yaml b/thirdparty/faiss/conda/faiss-gpu-raft/meta.yaml index 14a5c606b..387f8b4ac 100644 --- a/thirdparty/faiss/conda/faiss-gpu-raft/meta.yaml +++ b/thirdparty/faiss/conda/faiss-gpu-raft/meta.yaml @@ -47,13 +47,13 @@ outputs: host: - mkl =2023 # [x86_64] - openblas # [not x86_64] - - cudatoolkit {{ cudatoolkit }} - - libraft =23.08 + - cuda-version {{ cudatoolkit }} + - libraft =23.12 run: - mkl =2023 # [x86_64] - openblas # [not x86_64] - - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - - libraft =23.08 + - {{ pin_compatible('cuda-version', max_pin='x') }} + - libraft =23.12 test: requires: - conda-build @@ -90,6 +90,8 @@ outputs: - numpy - scipy - pytorch + - pytorch-cuda =11.8 + - cuda-version =11.8 commands: - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*" - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*" diff --git a/thirdparty/faiss/conda/faiss-gpu/meta.yaml b/thirdparty/faiss/conda/faiss-gpu/meta.yaml index fcfd3b4bd..e7b839975 100644 --- a/thirdparty/faiss/conda/faiss-gpu/meta.yaml +++ b/thirdparty/faiss/conda/faiss-gpu/meta.yaml @@ -51,7 +51,7 @@ outputs: run: - mkl =2023 # [x86_64] - openblas # [not x86_64] - - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} + - {{ pin_compatible('cudatoolkit', max_pin='x') }} test: requires: - conda-build @@ -88,6 +88,8 @@ outputs: - numpy - scipy - pytorch + - pytorch-cuda =11.8 + - cudatoolkit =11.8 commands: - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*" - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*" diff --git a/thirdparty/faiss/contrib/evaluation.py b/thirdparty/faiss/contrib/evaluation.py index 1f4068734..50e8a9331 100644 --- a/thirdparty/faiss/contrib/evaluation.py +++ b/thirdparty/faiss/contrib/evaluation.py @@ -380,7 +380,23 @@ def do_nothing_key(self): return np.zeros(len(self.ranges), dtype=int) def num_experiments(self): - return np.prod([len(values) for name, values in self.ranges]) + return int(np.prod([len(values) for name, values in self.ranges])) + + def sample_experiments(self, n_autotune, rs=np.random): + """ sample a set of experiments of max size n_autotune + (run all experiments in random order if n_autotune is 0) + """ + assert n_autotune == 0 or n_autotune >= 2 + totex = self.num_experiments() + rs = np.random.RandomState(123) + if n_autotune == 0 or totex < n_autotune: + experiments = rs.permutation(totex - 2) + else: + experiments = rs.choice( + totex - 2, size=n_autotune - 2, replace=False) + + experiments = [0, totex - 1] + [int(cno) + 1 for cno in experiments] + return experiments def cno_to_key(self, cno): """Convert a sequential experiment number to a key""" diff --git a/thirdparty/faiss/faiss/Index.h b/thirdparty/faiss/faiss/Index.h index ba9b6ba7d..c9b5e3022 100644 --- a/thirdparty/faiss/faiss/Index.h +++ b/thirdparty/faiss/faiss/Index.h @@ -16,9 +16,6 @@ #include #include -// #include -// using knowhere::BitsetView; - #define FAISS_VERSION_MAJOR 1 #define FAISS_VERSION_MINOR 7 #define FAISS_VERSION_PATCH 4 @@ -53,9 +50,6 @@ struct DistanceComputer; * Ownership of the object fields is always to the caller. */ struct SearchParameters { - // BitsetView bitset = nullptr; - - // // Disabled for Knowhere. /// if non-null, only these IDs will be considered during search. IDSelector* sel = nullptr; @@ -109,6 +103,7 @@ struct Index { * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1 * This function slices the input vectors in chunks smaller than * blocksize_add and calls add_core. + * @param n number of vectors * @param x input matrix, size n * d */ virtual void add(idx_t n, const float* x) = 0; @@ -118,7 +113,9 @@ struct Index { * The default implementation fails with an assertion, as it is * not supported by all indexes. * - * @param xids if non-null, ids to store for the vectors (size n) + * @param n number of vectors + * @param x input vectors, size n * d + * @param xids if non-null, ids to store for the vectors (size n) */ virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids); @@ -127,9 +124,11 @@ struct Index { * return at most k vectors. If there are not enough results for a * query, the result array is padded with -1s. * + * @param n number of vectors * @param x input vectors to search, size n * d - * @param labels output labels of the NNs, size n*k + * @param k number of extracted vectors * @param distances output pairwise distances, size n*k + * @param labels output labels of the NNs, size n*k */ virtual void search( idx_t n, @@ -145,6 +144,7 @@ struct Index { * indexes do not implement the range_search (only the k-NN search * is mandatory). * + * @param n number of vectors * @param x input vectors to search, size n * d * @param radius search radius * @param result result table @@ -159,8 +159,10 @@ struct Index { /** return the indexes of the k vectors closest to the query x. * * This function is identical as search but only return labels of neighbors. + * @param n number of vectors * @param x input vectors to search, size n * d * @param labels output labels of the NNs, size n*k + * @param k number of nearest neighbours */ virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1) const; @@ -184,7 +186,7 @@ struct Index { /** Reconstruct several stored vectors (or an approximation if lossy coding) * * this function may not be defined for some indexes - * @param n number of vectors to reconstruct + * @param n number of vectors to reconstruct * @param keys ids of the vectors to reconstruct (size n) * @param recons reconstucted vector (size n * d) */ @@ -194,6 +196,8 @@ struct Index { /** Reconstruct vectors i0 to i0 + ni - 1 * * this function may not be defined for some indexes + * @param i0 index of the first vector in the sequence + * @param ni number of vectors in the sequence * @param recons reconstucted vector (size ni * d) */ virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const; @@ -204,6 +208,11 @@ struct Index { * If there are not enough results for a query, the resulting arrays * is padded with -1s. * + * @param n number of vectors + * @param x input vectors to search, size n * d + * @param k number of extracted vectors + * @param distances output pairwise distances, size n*k + * @param labels output labels of the NNs, size n*k * @param recons reconstructed vectors size (n, k, d) **/ virtual void search_and_reconstruct( diff --git a/thirdparty/faiss/faiss/IndexFlat.h b/thirdparty/faiss/faiss/IndexFlat.h index c045f2779..165b06aaa 100644 --- a/thirdparty/faiss/faiss/IndexFlat.h +++ b/thirdparty/faiss/faiss/IndexFlat.h @@ -21,8 +21,10 @@ struct IndexFlat : IndexFlatCodes { /// database vectors, size ntotal * d std::vector xb; - explicit IndexFlat(idx_t d, MetricType metric = METRIC_L2, - bool is_cosine = false); + explicit IndexFlat( + idx_t d, ///< dimensionality of the input vectors + MetricType metric = METRIC_L2, + bool is_cosine = false); // Be careful with overriding this function, because // renormalized x may be used inside. @@ -89,6 +91,9 @@ struct IndexFlat : IndexFlatCodes { }; struct IndexFlatIP : IndexFlat { + /** + * @param d dimensionality of the input vectors + */ explicit IndexFlatIP(idx_t d) : IndexFlat(d, METRIC_INNER_PRODUCT) {} IndexFlatIP() {} }; @@ -100,6 +105,9 @@ struct IndexFlatL2 : IndexFlat { // and l2 norms. std::vector cached_l2norms; + /** + * @param d dimensionality of the input vectors + */ explicit IndexFlatL2(idx_t d) : IndexFlat(d, METRIC_L2) {} IndexFlatL2() {} diff --git a/thirdparty/faiss/faiss/IndexFlatCodes.h b/thirdparty/faiss/faiss/IndexFlatCodes.h index 35123ce2b..ad91c8601 100644 --- a/thirdparty/faiss/faiss/IndexFlatCodes.h +++ b/thirdparty/faiss/faiss/IndexFlatCodes.h @@ -36,7 +36,6 @@ struct IndexFlatCodes : Index { void reset() override; - /// reconstruction using the codec interface void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override; void reconstruct(idx_t key, float* recons) const override; diff --git a/thirdparty/faiss/faiss/IndexIVF.cpp b/thirdparty/faiss/faiss/IndexIVF.cpp index 8f976f37c..284dd83d5 100644 --- a/thirdparty/faiss/faiss/IndexIVF.cpp +++ b/thirdparty/faiss/faiss/IndexIVF.cpp @@ -711,6 +711,12 @@ void IndexIVF::search_preassigned( } } + // // baseline Faiss version contains the following additional code, + // // introduced in 0a00d8137a386a0efd7f789e3e0912ab4eb73508 + // if (ivf_stats == nullptr) { + // ivf_stats = &indexIVF_stats; + // } + if (ivf_stats) { ivf_stats->nq += n; ivf_stats->nlist += nlistv; @@ -911,6 +917,12 @@ void IndexIVF::range_search_preassigned( } } + // // baseline Faiss version contains the following additional code, + // // introduced in 0a00d8137a386a0efd7f789e3e0912ab4eb73508 + // if (stats == nullptr) { + // stats = &indexIVF_stats; + // } + if (stats) { stats->nq += nx; stats->nlist += nlistv; @@ -993,16 +1005,12 @@ void IndexIVF::search_and_reconstruct( std::min(nlist, params ? params->nprobe : this->nprobe); FAISS_THROW_IF_NOT(nprobe > 0); - // todo aguzhva: deprecate ScopeDeleter and ScopeDeleter1 - // in favor of std::unique_ptr - idx_t* idx = new idx_t[n * nprobe]; - ScopeDeleter del(idx); - float* coarse_dis = new float[n * nprobe]; - ScopeDeleter del2(coarse_dis); + std::unique_ptr idx(new idx_t[n * nprobe]); + std::unique_ptr coarse_dis(new float[n * nprobe]); - quantizer->search(n, x, nprobe, coarse_dis, idx); + quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get()); - invlists->prefetch_lists(idx, n * nprobe); + invlists->prefetch_lists(idx.get(), n * nprobe); // search_preassigned() with `store_pairs` enabled to obtain the list_no // and offset into `codes` for reconstruction @@ -1010,29 +1018,94 @@ void IndexIVF::search_and_reconstruct( n, x, k, - idx, - coarse_dis, + idx.get(), + coarse_dis.get(), distances, labels, true /* store_pairs */, params); - for (idx_t i = 0; i < n; ++i) { - for (idx_t j = 0; j < k; ++j) { - idx_t ij = i * k + j; - idx_t key = labels[ij]; - float* reconstructed = recons + ij * d; - if (key < 0) { - // Fill with NaNs - memset(reconstructed, -1, sizeof(*reconstructed) * d); - } else { - int list_no = lo_listno(key); - int offset = lo_offset(key); +#pragma omp parallel for if (n * k > 1000) + for (idx_t ij = 0; ij < n * k; ij++) { + idx_t key = labels[ij]; + float* reconstructed = recons + ij * d; + if (key < 0) { + // Fill with NaNs + memset(reconstructed, -1, sizeof(*reconstructed) * d); + } else { + int list_no = lo_listno(key); + int offset = lo_offset(key); + + // Update label to the actual id + labels[ij] = invlists->get_single_id(list_no, offset); + + reconstruct_from_offset(list_no, offset, reconstructed); + } + } +} + +void IndexIVF::search_and_return_codes( + idx_t n, + const float* x, + idx_t k, + float* distances, + idx_t* labels, + uint8_t* codes, + bool include_listno, + const SearchParameters* params_in) const { + const IVFSearchParameters* params = nullptr; + if (params_in) { + params = dynamic_cast(params_in); + FAISS_THROW_IF_NOT_MSG(params, "IndexIVF params have incorrect type"); + } + const size_t nprobe = + std::min(nlist, params ? params->nprobe : this->nprobe); + FAISS_THROW_IF_NOT(nprobe > 0); + + std::unique_ptr idx(new idx_t[n * nprobe]); + std::unique_ptr coarse_dis(new float[n * nprobe]); + + quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get()); + + invlists->prefetch_lists(idx.get(), n * nprobe); + + // search_preassigned() with `store_pairs` enabled to obtain the list_no + // and offset into `codes` for reconstruction + search_preassigned( + n, + x, + k, + idx.get(), + coarse_dis.get(), + distances, + labels, + true /* store_pairs */, + params); + + size_t code_size_1 = code_size; + if (include_listno) { + code_size_1 += coarse_code_size(); + } + +#pragma omp parallel for if (n * k > 1000) + for (idx_t ij = 0; ij < n * k; ij++) { + idx_t key = labels[ij]; + uint8_t* code1 = codes + ij * code_size_1; + + if (key < 0) { + // Fill with 0xff + memset(code1, -1, code_size_1); + } else { + int list_no = lo_listno(key); + int offset = lo_offset(key); + const uint8_t* cc = invlists->get_single_code(list_no, offset); - // Update label to the actual id - labels[ij] = invlists->get_single_id(list_no, offset); + labels[ij] = invlists->get_single_id(list_no, offset); - reconstruct_from_offset(list_no, offset, reconstructed); + if (include_listno) { + encode_listno(list_no, code1); + code1 += code_size_1 - code_size; } + memcpy(code1, cc, code_size); } } } diff --git a/thirdparty/faiss/faiss/IndexIVF.h b/thirdparty/faiss/faiss/IndexIVF.h index f8bbe6e3c..9e4bf0f62 100644 --- a/thirdparty/faiss/faiss/IndexIVF.h +++ b/thirdparty/faiss/faiss/IndexIVF.h @@ -358,6 +358,24 @@ struct IndexIVF : Index, IndexIVFInterface { float* recons, const SearchParameters* params = nullptr) const override; + /** Similar to search, but also returns the codes corresponding to the + * stored vectors for the search results. + * + * @param codes codes (n, k, code_size) + * @param include_listno + * include the list ids in the code (in this case add + * ceil(log8(nlist)) to the code size) + */ + void search_and_return_codes( + idx_t n, + const float* x, + idx_t k, + float* distances, + idx_t* labels, + uint8_t* recons, + bool include_listno = false, + const SearchParameters* params = nullptr) const; + /** Reconstruct a vector given the location in terms of (inv list index + * inv list offset) instead of the id. * diff --git a/thirdparty/faiss/faiss/IndexIVFAdditiveQuantizer.cpp b/thirdparty/faiss/faiss/IndexIVFAdditiveQuantizer.cpp index 9395dd6ee..7fc9b4a48 100644 --- a/thirdparty/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +++ b/thirdparty/faiss/faiss/IndexIVFAdditiveQuantizer.cpp @@ -150,6 +150,7 @@ struct AQInvertedListScanner : InvertedListScanner { const float* q; /// following codes come from this inverted list void set_list(idx_t list_no, float coarse_dis) override { + this->list_no = list_no; if (ia.metric_type == METRIC_L2 && ia.by_residual) { ia.quantizer->compute_residual(q0, tmp.data(), list_no); q = tmp.data(); diff --git a/thirdparty/faiss/faiss/IndexRefine.cpp b/thirdparty/faiss/faiss/IndexRefine.cpp index 2d17d33c4..2372c137a 100644 --- a/thirdparty/faiss/faiss/IndexRefine.cpp +++ b/thirdparty/faiss/faiss/IndexRefine.cpp @@ -97,16 +97,26 @@ void IndexRefine::search( idx_t k, float* distances, idx_t* labels, - const SearchParameters* params) const { - FAISS_THROW_IF_NOT_MSG( - !params, "search params not supported for this index"); + const SearchParameters* params_in) const { + const IndexRefineSearchParameters* params = nullptr; + if (params_in) { + params = dynamic_cast(params_in); + FAISS_THROW_IF_NOT_MSG( + params, "IndexRefine params have incorrect type"); + } + + idx_t k_base = (params != nullptr) ? idx_t(k * params->k_factor) + : idx_t(k * k_factor); + SearchParameters* base_index_params = + (params != nullptr) ? params->base_index_params : nullptr; + + FAISS_THROW_IF_NOT(k_base >= k); FAISS_THROW_IF_NOT(base_index); FAISS_THROW_IF_NOT(refine_index); FAISS_THROW_IF_NOT(k > 0); FAISS_THROW_IF_NOT(is_trained); - idx_t k_base = idx_t(k * k_factor); idx_t* base_labels = labels; float* base_distances = distances; ScopeDeleter del1; @@ -119,7 +129,8 @@ void IndexRefine::search( del2.set(base_distances); } - base_index->search(n, x, k_base, base_distances, base_labels); + base_index->search( + n, x, k_base, base_distances, base_labels, base_index_params); for (int i = 0; i < n * k_base; i++) assert(base_labels[i] >= -1 && base_labels[i] < ntotal); @@ -250,15 +261,26 @@ void IndexRefineFlat::search( idx_t k, float* distances, idx_t* labels, - const SearchParameters* params) const { - FAISS_THROW_IF_NOT_MSG( - !params, "search params not supported for this index"); + const SearchParameters* params_in) const { + const IndexRefineSearchParameters* params = nullptr; + if (params_in) { + params = dynamic_cast(params_in); + FAISS_THROW_IF_NOT_MSG( + params, "IndexRefineFlat params have incorrect type"); + } + + idx_t k_base = (params != nullptr) ? idx_t(k * params->k_factor) + : idx_t(k * k_factor); + SearchParameters* base_index_params = + (params != nullptr) ? params->base_index_params : nullptr; + + FAISS_THROW_IF_NOT(k_base >= k); + FAISS_THROW_IF_NOT(base_index); FAISS_THROW_IF_NOT(refine_index); FAISS_THROW_IF_NOT(k > 0); FAISS_THROW_IF_NOT(is_trained); - idx_t k_base = idx_t(k * k_factor); idx_t* base_labels = labels; float* base_distances = distances; ScopeDeleter del1; @@ -271,7 +293,8 @@ void IndexRefineFlat::search( del2.set(base_distances); } - base_index->search(n, x, k_base, base_distances, base_labels); + base_index->search( + n, x, k_base, base_distances, base_labels, base_index_params); for (int i = 0; i < n * k_base; i++) assert(base_labels[i] >= -1 && base_labels[i] < ntotal); diff --git a/thirdparty/faiss/faiss/IndexRefine.h b/thirdparty/faiss/faiss/IndexRefine.h index 79b671b56..23687af9f 100644 --- a/thirdparty/faiss/faiss/IndexRefine.h +++ b/thirdparty/faiss/faiss/IndexRefine.h @@ -11,6 +11,13 @@ namespace faiss { +struct IndexRefineSearchParameters : SearchParameters { + float k_factor = 1; + SearchParameters* base_index_params = nullptr; // non-owning + + virtual ~IndexRefineSearchParameters() = default; +}; + /** Index that queries in a base_index (a fast one) and refines the * results with an exact search, hopefully improving the results. */ diff --git a/thirdparty/faiss/faiss/impl/AdditiveQuantizer.cpp b/thirdparty/faiss/faiss/impl/AdditiveQuantizer.cpp index e72af5431..eea6be778 100644 --- a/thirdparty/faiss/faiss/impl/AdditiveQuantizer.cpp +++ b/thirdparty/faiss/faiss/impl/AdditiveQuantizer.cpp @@ -262,7 +262,7 @@ void AdditiveQuantizer::decode(const uint8_t* code, float* x, size_t n) const { is_trained, "The additive quantizer is not trained yet."); // standard additive quantizer decoding -#pragma omp parallel for if (n > 1000) +#pragma omp parallel for if (n > 100) for (int64_t i = 0; i < n; i++) { BitstringReader bsr(code + i * code_size, code_size); float* xi = x + i * d; diff --git a/thirdparty/faiss/faiss/impl/ProductQuantizer.cpp b/thirdparty/faiss/faiss/impl/ProductQuantizer.cpp index 06db87662..2ce6cecf1 100644 --- a/thirdparty/faiss/faiss/impl/ProductQuantizer.cpp +++ b/thirdparty/faiss/faiss/impl/ProductQuantizer.cpp @@ -307,7 +307,8 @@ void ProductQuantizer::decode(const uint8_t* code, float* x) const { } void ProductQuantizer::decode(const uint8_t* code, float* x, size_t n) const { - for (size_t i = 0; i < n; i++) { +#pragma omp parallel for if (n > 100) + for (int64_t i = 0; i < n; i++) { this->decode(code + code_size * i, x + d * i); } } diff --git a/thirdparty/faiss/faiss/utils/distances.cpp b/thirdparty/faiss/faiss/utils/distances.cpp index 252490598..1c340a3a3 100644 --- a/thirdparty/faiss/faiss/utils/distances.cpp +++ b/thirdparty/faiss/faiss/utils/distances.cpp @@ -896,11 +896,16 @@ void fvec_inner_products_by_idx( // // baseline version // for (size_t i = 0; i < ny; i++) { - // if (idsj[i] < 0) - // continue; - // ipj[i] = fvec_inner_product(xj, y + d * idsj[i], d); + // if (idsj[i] < 0) { + // ipj[i] = -INFINITY; + // } else { + // ipj[i] = fvec_inner_product(xj, y + d * idsj[i], d); + // } // } + // todo aguzhva: this version deviates from the baseline + // on not assigning -INFINITY + // the lambda that filters acceptable elements. auto filter = [=](const size_t i) { return (idsj[i] >= 0); }; @@ -940,11 +945,16 @@ void fvec_L2sqr_by_idx( // // baseline version // for (size_t i = 0; i < ny; i++) { - // if (idsj[i] < 0) - // continue; - // disj[i] = fvec_L2sqr(xj, y + d * idsj[i], d); + // if (idsj[i] < 0) { + // disj[i] = INFINITY; + // } else { + // disj[i] = fvec_L2sqr(xj, y + d * idsj[i], d); + // } // } + // todo aguzhva: this version deviates from the baseline + // on not assigning INFINITY + // the lambda that filters acceptable elements. auto filter = [=](const size_t i) { return (idsj[i] >= 0); }; @@ -978,6 +988,8 @@ void pairwise_indexed_L2sqr( for (int64_t j = 0; j < n; j++) { if (ix[j] >= 0 && iy[j] >= 0) { dis[j] = fvec_L2sqr(x + d * ix[j], y + d * iy[j], d); + } else { + dis[j] = INFINITY; } } } @@ -994,6 +1006,8 @@ void pairwise_indexed_inner_product( for (int64_t j = 0; j < n; j++) { if (ix[j] >= 0 && iy[j] >= 0) { dis[j] = fvec_inner_product(x + d * ix[j], y + d * iy[j], d); + } else { + dis[j] = -INFINITY; } } } diff --git a/thirdparty/faiss/faiss/utils/distances.h b/thirdparty/faiss/faiss/utils/distances.h index 30b631673..55e0063f7 100644 --- a/thirdparty/faiss/faiss/utils/distances.h +++ b/thirdparty/faiss/faiss/utils/distances.h @@ -105,8 +105,16 @@ void fvec_sub(size_t d, const float* a, const float* b, float* c); * Compute a subset of distances ***************************************************************************/ -/* compute the inner product between x and a subset y of ny vectors, - whose indices are given by idy. */ +/** compute the inner product between x and a subset y of ny vectors defined by + * ids + * + * ip(i, j) = inner_product(x(i, :), y(ids(i, j), :)) + * + * @param ip output array, size nx * ny + * @param x first-term vector, size nx * d + * @param y second-term vector, size (max(ids) + 1) * d + * @param ids ids to sample from y, size nx * ny + */ void fvec_inner_products_by_idx( float* ip, const float* x, @@ -116,7 +124,16 @@ void fvec_inner_products_by_idx( size_t nx, size_t ny); -/* same but for a subset in y indexed by idsy (ny vectors in total) */ +/** compute the squared L2 distances between x and a subset y of ny vectors + * defined by ids + * + * dis(i, j) = inner_product(x(i, :), y(ids(i, j), :)) + * + * @param dis output array, size nx * ny + * @param x first-term vector, size nx * d + * @param y second-term vector, size (max(ids) + 1) * d + * @param ids ids to sample from y, size nx * ny + */ void fvec_L2sqr_by_idx( float* dis, const float* x, @@ -143,7 +160,14 @@ void pairwise_indexed_L2sqr( const int64_t* iy, float* dis); -/* same for inner product */ +/** compute dis[j] = inner_product(x[ix[j]], y[iy[j]]) forall j=0..n-1 + * + * @param x size (max(ix) + 1, d) + * @param y size (max(iy) + 1, d) + * @param ix size n + * @param iy size n + * @param dis size n + */ void pairwise_indexed_inner_product( size_t d, size_t n, diff --git a/thirdparty/faiss/faiss/utils/hamming.cpp b/thirdparty/faiss/faiss/utils/hamming.cpp index 00ac8dc7a..f0242f875 100644 --- a/thirdparty/faiss/faiss/utils/hamming.cpp +++ b/thirdparty/faiss/faiss/utils/hamming.cpp @@ -686,4 +686,88 @@ void generalized_hammings_knn_hc( ha->reorder(); } +void pack_bitstrings( + size_t n, + size_t M, + int nbit, + const int32_t* unpacked, + uint8_t* packed, + size_t code_size) { + FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8); +#pragma omp parallel for if (n > 1000) + for (int64_t i = 0; i < n; i++) { + const int32_t* in = unpacked + i * M; + uint8_t* out = packed + i * code_size; + BitstringWriter wr(out, code_size); + for (int j = 0; j < M; j++) { + wr.write(in[j], nbit); + } + } +} + +void pack_bitstrings( + size_t n, + size_t M, + const int32_t* nbit, + const int32_t* unpacked, + uint8_t* packed, + size_t code_size) { + int totbit = 0; + for (int j = 0; j < M; j++) { + totbit += nbit[j]; + } + FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8); +#pragma omp parallel for if (n > 1000) + for (int64_t i = 0; i < n; i++) { + const int32_t* in = unpacked + i * M; + uint8_t* out = packed + i * code_size; + BitstringWriter wr(out, code_size); + for (int j = 0; j < M; j++) { + wr.write(in[j], nbit[j]); + } + } +} + +void unpack_bitstrings( + size_t n, + size_t M, + int nbit, + const uint8_t* packed, + size_t code_size, + int32_t* unpacked) { + FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8); +#pragma omp parallel for if (n > 1000) + for (int64_t i = 0; i < n; i++) { + const uint8_t* in = packed + i * code_size; + int32_t* out = unpacked + i * M; + BitstringReader rd(in, code_size); + for (int j = 0; j < M; j++) { + out[j] = rd.read(nbit); + } + } +} + +void unpack_bitstrings( + size_t n, + size_t M, + const int32_t* nbit, + const uint8_t* packed, + size_t code_size, + int32_t* unpacked) { + int totbit = 0; + for (int j = 0; j < M; j++) { + totbit += nbit[j]; + } + FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8); +#pragma omp parallel for if (n > 1000) + for (int64_t i = 0; i < n; i++) { + const uint8_t* in = packed + i * code_size; + int32_t* out = unpacked + i * M; + BitstringReader rd(in, code_size); + for (int j = 0; j < M; j++) { + out[j] = rd.read(nbit[j]); + } + } +} + } // namespace faiss diff --git a/thirdparty/faiss/faiss/utils/hamming.h b/thirdparty/faiss/faiss/utils/hamming.h index 661b49e49..89b1dc5da 100644 --- a/thirdparty/faiss/faiss/utils/hamming.h +++ b/thirdparty/faiss/faiss/utils/hamming.h @@ -225,6 +225,64 @@ void generalized_hammings_knn_hc( size_t code_size, int ordered = true); +/** Pack a set of n codes of size M * nbit + * + * @param n number of codes to pack + * @param M number of elementary codes per code + * @param nbit number of bits per elementary code + * @param unpacked input unpacked codes, size (n, M) + * @param packed output packed codes, size (n, code_size) + * @param code_size should be >= ceil(M * nbit / 8) + */ +void pack_bitstrings( + size_t n, + size_t M, + int nbit, + const int32_t* unpacked, + uint8_t* packed, + size_t code_size); + +/** Pack a set of n codes of variable sizes + * + * @param nbit number of bits per entry (size M) + */ +void pack_bitstrings( + size_t n, + size_t M, + const int32_t* nbits, + const int32_t* unpacked, + uint8_t* packed, + size_t code_size); + +/** Unpack a set of n codes of size M * nbit + * + * @param n number of codes to pack + * @param M number of elementary codes per code + * @param nbit number of bits per elementary code + * @param unpacked input unpacked codes, size (n, M) + * @param packed output packed codes, size (n, code_size) + * @param code_size should be >= ceil(M * nbit / 8) + */ +void unpack_bitstrings( + size_t n, + size_t M, + int nbit, + const uint8_t* packed, + size_t code_size, + int32_t* unpacked); + +/** Unpack a set of n codes of variable sizes + * + * @param nbit number of bits per entry (size M) + */ +void unpack_bitstrings( + size_t n, + size_t M, + const int32_t* nbits, + const uint8_t* packed, + size_t code_size, + int32_t* unpacked); + } // namespace faiss #include diff --git a/thirdparty/faiss/faiss/utils/partitioning.cpp b/thirdparty/faiss/faiss/utils/partitioning.cpp index 955bf2da9..4070aaae6 100644 --- a/thirdparty/faiss/faiss/utils/partitioning.cpp +++ b/thirdparty/faiss/faiss/utils/partitioning.cpp @@ -754,8 +754,6 @@ typename C::T partition_fuzzy( size_t q_min, size_t q_max, size_t* q_out) { - // the code below compiles and runs without AVX2 but it's slower than - // the scalar implementation #ifdef __AVX2__ constexpr bool is_uint16 = std::is_same::value; if (is_uint16 && is_aligned_pointer(vals)) { diff --git a/thirdparty/faiss/tests/test_index_composite.py b/thirdparty/faiss/tests/test_index_composite.py index 81a00cb93..a760c0cf0 100644 --- a/thirdparty/faiss/tests/test_index_composite.py +++ b/thirdparty/faiss/tests/test_index_composite.py @@ -14,7 +14,7 @@ import tempfile import platform -from common_faiss_tests import get_dataset_2 +from common_faiss_tests import get_dataset_2, get_dataset from faiss.contrib.datasets import SyntheticDataset from faiss.contrib.inspect_tools import make_LinearTransform_matrix from faiss.contrib.evaluation import check_ref_knn_with_draws @@ -822,3 +822,158 @@ def test_precomputed_tables(self): np.testing.assert_array_equal(Dnew, D2) np.testing.assert_array_equal(Inew, I2) + + + +class TestSearchAndReconstruct(unittest.TestCase): + + def run_search_and_reconstruct(self, index, xb, xq, k=10, eps=None): + n, d = xb.shape + assert xq.shape[1] == d + assert index.d == d + + D_ref, I_ref = index.search(xq, k) + R_ref = index.reconstruct_n(0, n) + D, I, R = index.search_and_reconstruct(xq, k) + + np.testing.assert_almost_equal(D, D_ref, decimal=5) + self.assertTrue((I == I_ref).all()) + self.assertEqual(R.shape[:2], I.shape) + self.assertEqual(R.shape[2], d) + + # (n, k, ..) -> (n * k, ..) + I_flat = I.reshape(-1) + R_flat = R.reshape(-1, d) + # Filter out -1s when not enough results + R_flat = R_flat[I_flat >= 0] + I_flat = I_flat[I_flat >= 0] + + recons_ref_err = np.mean(np.linalg.norm(R_flat - R_ref[I_flat])) + self.assertLessEqual(recons_ref_err, 1e-6) + + def norm1(x): + return np.sqrt((x ** 2).sum(axis=1)) + + recons_err = np.mean(norm1(R_flat - xb[I_flat])) + + print('Reconstruction error = %.3f' % recons_err) + if eps is not None: + self.assertLessEqual(recons_err, eps) + + return D, I, R + + def test_IndexFlat(self): + d = 32 + nb = 1000 + nt = 1500 + nq = 200 + + (xt, xb, xq) = get_dataset(d, nb, nt, nq) + + index = faiss.IndexFlatL2(d) + index.add(xb) + + self.run_search_and_reconstruct(index, xb, xq, eps=0.0) + + def test_IndexIVFFlat(self): + d = 32 + nb = 1000 + nt = 1500 + nq = 200 + + (xt, xb, xq) = get_dataset(d, nb, nt, nq) + + quantizer = faiss.IndexFlatL2(d) + index = faiss.IndexIVFFlat(quantizer, d, 32, faiss.METRIC_L2) + index.cp.min_points_per_centroid = 5 # quiet warning + index.nprobe = 4 + index.train(xt) + index.add(xb) + + self.run_search_and_reconstruct(index, xb, xq, eps=0.0) + + def test_IndexIVFPQ(self): + d = 32 + nb = 1000 + nt = 1500 + nq = 200 + + (xt, xb, xq) = get_dataset(d, nb, nt, nq) + + quantizer = faiss.IndexFlatL2(d) + index = faiss.IndexIVFPQ(quantizer, d, 32, 8, 8) + index.cp.min_points_per_centroid = 5 # quiet warning + index.nprobe = 4 + index.train(xt) + index.add(xb) + + self.run_search_and_reconstruct(index, xb, xq, eps=1.0) + + def test_MultiIndex(self): + d = 32 + nb = 1000 + nt = 1500 + nq = 200 + + (xt, xb, xq) = get_dataset(d, nb, nt, nq) + + index = faiss.index_factory(d, "IMI2x5,PQ8np") + faiss.ParameterSpace().set_index_parameter(index, "nprobe", 4) + index.train(xt) + index.add(xb) + + self.run_search_and_reconstruct(index, xb, xq, eps=1.0) + + def test_IndexTransform(self): + d = 32 + nb = 1000 + nt = 1500 + nq = 200 + + (xt, xb, xq) = get_dataset(d, nb, nt, nq) + + index = faiss.index_factory(d, "L2norm,PCA8,IVF32,PQ8np") + faiss.ParameterSpace().set_index_parameter(index, "nprobe", 4) + index.train(xt) + index.add(xb) + + self.run_search_and_reconstruct(index, xb, xq) + + +class TestSearchAndGetCodes(unittest.TestCase): + + def do_test(self, factory_string): + ds = SyntheticDataset(32, 1000, 100, 10) + + index = faiss.index_factory(ds.d, factory_string) + + index.train(ds.get_train()) + index.add(ds.get_database()) + + index.nprobe + index.nprobe = 10 + Dref, Iref = index.search(ds.get_queries(), 10) + + #print(index.search_and_return_codes) + D, I, codes = index.search_and_return_codes( + ds.get_queries(), 10, include_listnos=True) + + np.testing.assert_array_equal(I, Iref) + np.testing.assert_array_equal(D, Dref) + + # verify that we get the same distances when decompressing from + # returned codes (the codes are compatible with sa_decode) + for qi in range(ds.nq): + q = ds.get_queries()[qi] + xbi = index.sa_decode(codes[qi]) + D2 = ((q - xbi) ** 2).sum(1) + np.testing.assert_allclose(D2, D[qi], rtol=1e-5) + + def test_ivfpq(self): + self.do_test("IVF20,PQ4x4np") + + def test_ivfsq(self): + self.do_test("IVF20,SQ8") + + def test_ivfrq(self): + self.do_test("IVF20,RQ3x4") diff --git a/thirdparty/faiss/tests/test_refine.py b/thirdparty/faiss/tests/test_refine.py index aff285f40..4e85ee11e 100644 --- a/thirdparty/faiss/tests/test_refine.py +++ b/thirdparty/faiss/tests/test_refine.py @@ -64,3 +64,58 @@ def test_distance_computer_AQ_LUT(self): def test_distance_computer_AQ_LUT_IP(self): self.do_test("RQ3x4_Nqint8", faiss.METRIC_INNER_PRODUCT) + + +class TestIndexRefineSearchParams(unittest.TestCase): + + def do_test(self, factory_string): + ds = datasets.SyntheticDataset(32, 256, 100, 40) + + index = faiss.index_factory(32, factory_string) + index.train(ds.get_train()) + index.add(ds.get_database()) + index.nprobe = 4 + xq = ds.get_queries() + + # do a search with k_factor = 1 + D1, I1 = index.search(xq, 10) + inter1 = faiss.eval_intersection(I1, ds.get_groundtruth(10)) + + # do a search with k_factor = 1.5 + params = faiss.IndexRefineSearchParameters(k_factor=1.1) + D2, I2 = index.search(xq, 10, params=params) + inter2 = faiss.eval_intersection(I2, ds.get_groundtruth(10)) + + # do a search with k_factor = 2 + params = faiss.IndexRefineSearchParameters(k_factor=2) + D3, I3 = index.search(xq, 10, params=params) + inter3 = faiss.eval_intersection(I3, ds.get_groundtruth(10)) + + # make sure that the recall rate increases with k_factor + self.assertGreater(inter2, inter1) + self.assertGreater(inter3, inter2) + + # make sure that the baseline k_factor is unchanged + self.assertEqual(index.k_factor, 1) + + # try passing params for the baseline index, change nprobe + base_params = faiss.IVFSearchParameters(nprobe=10) + params = faiss.IndexRefineSearchParameters(k_factor=1, base_index_params=base_params) + D4, I4 = index.search(xq, 10, params=params) + inter4 = faiss.eval_intersection(I4, ds.get_groundtruth(10)) + + base_params = faiss.IVFSearchParameters(nprobe=2) + params = faiss.IndexRefineSearchParameters(k_factor=1, base_index_params=base_params) + D5, I5 = index.search(xq, 10, params=params) + inter5 = faiss.eval_intersection(I5, ds.get_groundtruth(10)) + + # make sure that the recall rate changes + self.assertNotEqual(inter4, inter5) + + def test_rflat(self): + # flat is handled by the IndexRefineFlat class + self.do_test("IVF8,PQ2x4np,RFlat") + + def test_refine_sq8(self): + # this case uses the IndexRefine class + self.do_test("IVF8,PQ2x4np,Refine(SQ8)") diff --git a/thirdparty/faiss/tests/test_residual_quantizer.py b/thirdparty/faiss/tests/test_residual_quantizer.py index 2ca3e4c32..e37ee3efe 100644 --- a/thirdparty/faiss/tests/test_residual_quantizer.py +++ b/thirdparty/faiss/tests/test_residual_quantizer.py @@ -1117,7 +1117,7 @@ def test_precomp(self): K = 1 << rq.nbits.at(m) cpp_table = codebook_cross_prods[ofs:ofs + K * kk].reshape(kk, K) ofs += kk * K - np.testing.assert_allclose(py_table, cpp_table, rtol=2e-4) + np.testing.assert_allclose(py_table, cpp_table, atol=1e-5) cent_norms = faiss.vector_to_array(rq.cent_norms) np.testing.assert_array_almost_equal( diff --git a/thirdparty/faiss/tests/test_standalone_codec.py b/thirdparty/faiss/tests/test_standalone_codec.py index 1e1993bb4..7fdcf6849 100644 --- a/thirdparty/faiss/tests/test_standalone_codec.py +++ b/thirdparty/faiss/tests/test_standalone_codec.py @@ -266,9 +266,9 @@ def test_ZnSphereCodecAlt24(self): class TestBitstring(unittest.TestCase): - """ Low-level bit string tests """ def test_rw(self): + """ Low-level bit string tests """ rs = np.random.RandomState(1234) nbyte = 1000 sz = 0 @@ -311,6 +311,26 @@ def test_rw(self): # print('nbit %d xref %x xnew %x' % (nbit, xref, xnew)) self.assertTrue(xnew == xref) + def test_arrays(self): + nbit = 5 + M = 10 + n = 20 + rs = np.random.RandomState(123) + a = rs.randint(1<