【python】faiss

0.介绍
facebook开发的图像检索框架

1.安装

conda install -c pytorch faiss-cpu
conda install -c pytorch faiss-gpu cudatoolkit=10.2 # for CUDA 10.2
conda install -c pytorch faiss-gpu cudatoolkit=11.3 # for CUDA 11.3

2.应用

import numpy as np

d = 64                           # dimension
nb = 100000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

import faiss                   # make faiss available
index = faiss.IndexFlatL2(d)   # build the index
print(index.is_trained)
index.add(xb)                  # add vectors to the index
print(index.ntotal)

k = 4                          # we want to see 4 nearest neighbors
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
D, I = index.search(xq, k)     # actual search
print(I[:5])                   # neighbors of the 5 first queries
print(I[-5:])                  # neighbors of the 5 last queries

m = 16                                   # number of subquantizers
n_bits = 8                               # bits allocated per subquantizer
pq = faiss.IndexPQ (d, m, n_bits)        # Create the index
pq.train (x_base)                       # Training
pq.add (x_base)                          # Populate the index
D, I = pq.search (x_query, k)            # Perform a search


faiss 高级进阶

# -*- coding: utf-8 -*-
import faiss
import numpy as np


if __name__ == '__main__':
    num = 50
    d = 32

    x = np.random.rand(num, d).astype('float32')
    ids = np.arange(num).astype(np.int64)

    # index = faiss.IndexFlatL2(d)
    index = faiss.IndexIDMap(faiss.IndexFlatL2(d))
    print(index.is_trained)
    index.add_with_ids(x, ids)
    print(index.ntotal)

    # index.id_map.at(int(i)) for i in range(index.ntotal)
    for i in range(index.ntotal):
        print(index.id_map.at(i))
    print("-"*20)

    # 删除节点
    count = 30
    ids_del = [index.id_map.at(i) for i in range(index.ntotal - count)]
    ids_del = np.array(ids_del, dtype=np.int64)
    index.remove_ids(ids_del)
    print(index.ntotal)

    print("-"*20)
    for i in range(index.ntotal):
        print(index.id_map.at(i))

faiss hnsw gpu

你可能感兴趣的:(python,python)