import numpy as np
import faiss
# 生成一些随机数据作为示例
np.random.seed(42)
data = np.random.random((10000, 64)).astype('float32')
# 定义 PQ 索引的参数
m, nbits = 8, 8 # m: 子空间的数量, nbits: 每个子空间的比特数
# 创建 IndexPQ
index = faiss.IndexPQ(data.shape[1], m, nbits)
# 训练索引
index.train(data)
# 添加数据到索引
index.add(data)
# 查询示例
query_vector = np.random.random((1, 64)).astype('float32')
k = 5 # 获取前 k 个最近邻
# 进行查询
distances, indices = index.search(query_vector, k)
# 打印结果
print("Query Vector:")
print(query_vector)
print("\nIndices of Nearest Neighbors:")
print(indices)
print("\nDistances to Nearest Neighbors:")
print(distances)
IndexPQ中包含pq对象
ProductQuantizer pq对象中包含质心表。其尺寸布局为 M * ksub * dsub:(M、ksub、dsub)(8,256,8)
参数 | 解释 |
---|---|
M | number of subquantizers ,输入向量被分为的片段的个数 |
dsub | dimensionality of each subvector,每个子聚类表的长度 |
ksub = 2 n 2^n 2n | number of centroids for each subquantizer,每个子聚类表的宽度 |
可通过faiss.vector_to_array(index.pq.centroids)
查看质心表中的具体数值
import numpy as np
import faiss
# 生成一些随机数据作为示例
np.random.seed(42)
data = np.random.random((10000, 64)).astype('float32')
data2 = np.random.random((10000, 64)).astype('float32')
# 定义 PQ 索引的参数
m, nbits = 8, 8 # m: 子空间的数量, nbits: 每个子空间的比特数
# 创建 IndexPQ
index = faiss.IndexPQ(data.shape[1], m, nbits)
index.train(data)
index2 = faiss.IndexPQ(data.shape[1], m, nbits)
index2.train(data2)
index2.pq.centroids = index.pq.centroids
# 添加数据到索引
index.add(data)
index2.add(data)
# 查询示例
query_vector = np.random.random((1, 64)).astype('float32')
k = 5 # 获取前 k 个最近邻
# 进行查询
distances, indices = index.search(query_vector, k)
# 打印结果
print("Query Vector:")
print(query_vector)
print("\nIndices of Nearest Neighbors:")
print(indices)
print("\nDistances to Nearest Neighbors:")
print(distances)
distances, indices = index2.search(query_vector, k)
# 打印结果
print("Query Vector:")
print(query_vector)
print("\nIndices of Nearest Neighbors:")
print(indices)
print("\nDistances to Nearest Neighbors:")
print(distances)
# Query Vector:
# [[0.18171448 0.34181556 0.6398858 0.292473 0.44219118 0.63791186
# 0.19401862 0.17734843 0.26126006 0.38929975 0.02442818 0.72467136
# 0.9121011 0.0601452 0.42044804 0.56506294 0.9892394 0.2520515
# 0.12554157 0.3569948 0.7176223 0.6282157 0.53028387 0.19011611
# 0.8374111 0.91366297 0.6300717 0.21906242 0.34832168 0.6042122
# 0.55216706 0.15355448 0.47739747 0.07588766 0.45951515 0.46728414
# 0.8784772 0.2502514 0.8283812 0.77515835 0.7159397 0.6975115
# 0.24739715 0.89320683 0.07678613 0.7589492 0.29475844 0.8860514
# 0.8515612 0.9372315 0.5690415 0.02019571 0.78275704 0.02964665
# 0.36082503 0.22074123 0.4638003 0.3445418 0.8347299 0.3678306
# 0.00145097 0.44658396 0.02120558 0.74333763]]
# Indices of Nearest Neighbors:
# [[1356 3975 2011 5711 4734]]
# Distances to Nearest Neighbors:
# [[5.3155017 5.561659 5.6874743 5.7380037 5.762418 ]]
# Query Vector:
# [[0.18171448 0.34181556 0.6398858 0.292473 0.44219118 0.63791186
# 0.19401862 0.17734843 0.26126006 0.38929975 0.02442818 0.72467136
# 0.9121011 0.0601452 0.42044804 0.56506294 0.9892394 0.2520515
# 0.12554157 0.3569948 0.7176223 0.6282157 0.53028387 0.19011611
# 0.8374111 0.91366297 0.6300717 0.21906242 0.34832168 0.6042122
# 0.55216706 0.15355448 0.47739747 0.07588766 0.45951515 0.46728414
# 0.8784772 0.2502514 0.8283812 0.77515835 0.7159397 0.6975115
# 0.24739715 0.89320683 0.07678613 0.7589492 0.29475844 0.8860514
# 0.8515612 0.9372315 0.5690415 0.02019571 0.78275704 0.02964665
# 0.36082503 0.22074123 0.4638003 0.3445418 0.8347299 0.3678306
# 0.00145097 0.44658396 0.02120558 0.74333763]]
# Indices of Nearest Neighbors:
# [[1356 3975 2011 5711 4734]]
# Distances to Nearest Neighbors:
# [[5.3155017 5.561659 5.6874743 5.7380037 5.762418 ]]
import numpy as np
import faiss
# 定义 PQ 索引的参数
m, nbits = 8, 8 # m: 子空间的数量, nbits: 每个子空间的比特数
# 生成一些随机数据作为示例
np.random.seed(42)
data = np.random.random((10000, 64)).astype('float32')
data2 = np.random.random((10000, 64)).astype('float32')
# 创建 IndexPQ
index = faiss.IndexPQ(data.shape[1], m, nbits)
index.train(data)
index2 = faiss.IndexPQ(data.shape[1], m, nbits)
index2.train(data2)
index3 = faiss.IndexPQ(data.shape[1], m, nbits+1)# 3为1和2的融合
index3.train(data2)
# index2.pq.centroids = index.pq.centroids
faiss.copy_array_to_vector(
np.hstack((
faiss.vector_to_array(index.pq.centroids),
faiss.vector_to_array(index2.pq.centroids)
)),
index3.pq.centroids
)
# 添加数据到索引
index.add(data)
index2.add(data)
index3.add(data)
# 查询示例
query_vector = np.random.random((1, 64)).astype('float32')
k = 5 # 获取前 k 个最近邻
# 进行查询
distances, indices = index.search(query_vector, k)
# 打印结果
print("\nIndices of Nearest Neighbors:")
print(indices)
print("\nDistances to Nearest Neighbors:")
print(distances)
distances, indices = index2.search(query_vector, k)
# 打印结果
print("\nIndices of Nearest Neighbors:")
print(indices)
print("\nDistances to Nearest Neighbors:")
print(distances)
distances, indices = index3.search(query_vector, k)
# 打印结果
print("\nIndices of Nearest Neighbors:")
print(indices)
print("\nDistances to Nearest Neighbors:")
print(distances)
# Indices of Nearest Neighbors:
# [[1356 3975 2011 5711 4734]]
# Distances to Nearest Neighbors:
# [[5.3155017 5.561659 5.6874743 5.7380037 5.762418 ]]
# Indices of Nearest Neighbors:
# [[7929 4107 961 2473 4802]]
# Distances to Nearest Neighbors:
# [[5.2938 5.525796 5.57064 5.7225237 5.799486 ]]
# Indices of Nearest Neighbors:
# [[7929 961 8924 6034 7534]]
# Distances to Nearest Neighbors:
# [[5.272976 5.293335 5.605359 5.696639 5.707428]]
# 打印最近邻居的向量
print("\nNearest Neighbors:")
for i in range(k):
neighbor_index = indices[0, i]
neighbor_vector = data[neighbor_index]
print(f"Neighbor {i + 1}: Index {neighbor_index}, Distance {distances[0, i]}, Vector {neighbor_vector}")
Nearest Neighbors:
Neighbor 1: Index 1356, Distance 5.315501689910889, Vector [0.01101539 0.6567009 0.7633245 0.11660998 0.33732712 0.8499721
0.68720007 0.30464375 0.7422429 0.88726753 0.30932006 0.6842837
0.09341944 0.0586829 0.58625734 0.49242404 0.8100883 0.7802833
0.2866956 0.5122624 0.7557766 0.27095273 0.36196133 0.05986348
0.13048859 0.6102204 0.49675122 0.16859066 0.0072812 0.16903314
0.7496399 0.09368231 0.40244937 0.23878902 0.54939663 0.51155233
0.98295355 0.7728801 0.9383296 0.5779583 0.73778135 0.8950766
0.041071 0.91545016 0.21177031 0.7050161 0.7733409 0.109326
0.9530999 0.92655915 0.65455276 0.15532914 0.5660506 0.34414485
0.9307643 0.40665573 0.69374937 0.6370151 0.2710153 0.53549683
0.40998015 0.37462777 0.86400545 0.13975835]
Neighbor 2: Index 3975, Distance 5.56165885925293, Vector [0.6852252 0.79311645 0.3148995 0.42644194 0.43068996 0.21183491
0.05787511 0.9602238 0.29530123 0.68910587 0.15870273 0.708609
0.86639625 0.4510904 0.95853996 0.23694353 0.9699781 0.77007866
0.48550996 0.40872052 0.46613166 0.24974766 0.01244073 0.43974018
0.6752544 0.85017306 0.81168395 0.89650345 0.00525839 0.26145405
0.16250415 0.26849723 0.01632813 0.28710592 0.73261696 0.00488606
0.64295805 0.55107576 0.56322 0.731344 0.98232174 0.511173
0.18898515 0.914521 0.59773636 0.7063284 0.73153925 0.97906655
0.7590872 0.4468203 0.8288643 0.39922148 0.6796608 0.2297831
0.6257001 0.5006799 0.8744495 0.14236866 0.12442626 0.14521043
0.08433475 0.96692973 0.13060258 0.35526052]
Nearest Neighbors:
Neighbor 1: Index 7929, Distance 5.293799877166748, Vector [0.96233946 0.5737502 0.59273595 0.23098944 0.5369705 0.63797593
0.42823425 0.24575251 0.8893288 0.54502964 0.8060116 0.65886575
0.78253627 0.36670887 0.02456753 0.9354817 0.50337505 0.10899781
0.2375323 0.617193 0.43202353 0.2877622 0.23769969 0.46321324
0.54506296 0.92509645 0.6306161 0.29780295 0.4218431 0.03696149
0.3116852 0.390165 0.9549252 0.3775373 0.5620233 0.9112755
0.1394593 0.2466888 0.9241558 0.86005247 0.7937772 0.9627047
0.09679138 0.8644842 0.071664 0.19625679 0.01667842 0.68986166
0.71011275 0.7705593 0.67370415 0.07858868 0.4308906 0.09075476
0.03766147 0.18467574 0.2782387 0.37127924 0.98378307 0.48489136
0.22696696 0.07038712 0.22267212 0.10312359]
Neighbor 2: Index 4107, Distance 5.525795936584473, Vector [0.04460111 0.5836406 0.27762762 0.75389206 0.52659243 0.88937527
0.5552024 0.43461925 0.12575674 0.29606643 0.19991362 0.86584586
0.53224045 0.20149525 0.34396216 0.05069733 0.5733588 0.06891397
0.55476147 0.6457947 0.6288594 0.30873945 0.02107575 0.02294
0.05592747 0.21791738 0.37937504 0.93809557 0.72561693 0.70872927
0.89278466 0.8034361 0.78736126 0.15266728 0.6486509 0.34981716
0.91982204 0.00775846 0.585377 0.775304 0.5465568 0.80789727
0.9480229 0.705922 0.7635816 0.4436006 0.7039021 0.7166679
0.6503457 0.8582911 0.3602512 0.37543017 0.9880262 0.28702474
0.14523816 0.4190667 0.86942685 0.15948081 0.83756304 0.5973361
0.0859841 0.40533915 0.47337615 0.48650718]
Nearest Neighbors:
Neighbor 1: Index 7929, Distance 5.272975921630859, Vector [0.96233946 0.5737502 0.59273595 0.23098944 0.5369705 0.63797593
0.42823425 0.24575251 0.8893288 0.54502964 0.8060116 0.65886575
0.78253627 0.36670887 0.02456753 0.9354817 0.50337505 0.10899781
0.2375323 0.617193 0.43202353 0.2877622 0.23769969 0.46321324
0.54506296 0.92509645 0.6306161 0.29780295 0.4218431 0.03696149
0.3116852 0.390165 0.9549252 0.3775373 0.5620233 0.9112755
0.1394593 0.2466888 0.9241558 0.86005247 0.7937772 0.9627047
0.09679138 0.8644842 0.071664 0.19625679 0.01667842 0.68986166
0.71011275 0.7705593 0.67370415 0.07858868 0.4308906 0.09075476
0.03766147 0.18467574 0.2782387 0.37127924 0.98378307 0.48489136
0.22696696 0.07038712 0.22267212 0.10312359]
Neighbor 2: Index 961, Distance 5.2933349609375, Vector [0.9621167 0.2617852 0.48362496 0.88779247 0.4134914 0.52861816
0.16878773 0.2850794 0.5061142 0.36490148 0.0382557 0.40082905
0.81510574 0.11605944 0.01873139 0.11870275 0.6868702 0.79464465
0.04872655 0.8875509 0.62732536 0.5181314 0.2535919 0.37170032
0.94697326 0.9115464 0.62546456 0.57891124 0.21054466 0.95327854
0.7553917 0.3822597 0.81583154 0.21187466 0.21322866 0.7909612
0.559308 0.5558353 0.5736708 0.12580682 0.34955907 0.57307965
0.24758843 0.50400496 0.55703527 0.9428139 0.2457758 0.43935728
0.98151124 0.18678987 0.78001946 0.17715496 0.8500466 0.48797393
0.9721615 0.17007497 0.68792635 0.69527924 0.7188754 0.10096876
0.288561 0.33801684 0.3242876 0.6750207 ]