Precision annoy.AnnoyIndex with examples#
An example showing the AnnoyIndex class.
from __future__ import print_function
import random; random.seed(0)
import time
# from annoy import AnnoyIndex
# from scikitplot.annoy import AnnoyIndex
from scikitplot.annoy import Index as AnnoyIndex
try:
from tqdm.auto import tqdm, trange
except ImportError:
# Fallback: dummy versions that ignore all args/kwargs
tqdm = lambda iterable, *args, **kwargs: iterable
trange = lambda n, *args, **kwargs: range(n)
n, f = 1_000_000, 100 # 100~2.5GB
n, f = 100_000, 100 # 100~0.25GB 256~0.6GB
idx = AnnoyIndex(
f=f,
metric='angular',
)
idx.set_seed(0)
for i in trange(n):
if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
# v = []
# for z in range(f):
# v.append(random.gauss(0, 1))
v = [random.gauss(0, 1) for _ in range(f)]
idx.add_item(i, v)
idx.build(2 * f)
idx.save('test.annoy')
idx.info()
/home/circleci/repo/galleries/examples/annoy/plot_precision_script.py:37: UserWarning:
seed=0 resets to Annoy's default seed
0%| | 0/100000 [00:00<?, ?it/s]0 / 100000 = 0.0
0%| | 497/100000 [00:00<00:20, 4967.08it/s]
1%| | 1134/100000 [00:00<00:17, 5791.28it/s]
2%|▏ | 1714/100000 [00:00<00:17, 5583.07it/s]
2%|▏ | 2274/100000 [00:00<00:17, 5467.23it/s]
3%|▎ | 2880/100000 [00:00<00:17, 5674.73it/s]
3%|▎ | 3449/100000 [00:00<00:21, 4509.57it/s]
4%|▍ | 3934/100000 [00:00<00:21, 4570.12it/s]
4%|▍ | 4430/100000 [00:00<00:20, 4678.64it/s]
5%|▍ | 4916/100000 [00:01<00:20, 4640.34it/s]
5%|▌ | 5392/100000 [00:01<00:21, 4484.01it/s]
6%|▌ | 5849/100000 [00:01<00:25, 3722.36it/s]
6%|▋ | 6356/100000 [00:01<00:23, 4057.92it/s]
7%|▋ | 6787/100000 [00:01<00:23, 3991.76it/s]
7%|▋ | 7335/100000 [00:01<00:21, 4387.84it/s]
8%|▊ | 7791/100000 [00:01<00:21, 4383.13it/s]
8%|▊ | 8386/100000 [00:01<00:18, 4821.87it/s]
9%|▉ | 8880/100000 [00:01<00:19, 4788.69it/s]
9%|▉ | 9367/100000 [00:02<00:23, 3889.81it/s]
10%|▉ | 9811/100000 [00:02<00:22, 4026.62it/s]10000 / 100000 = 0.1
10%|█ | 10320/100000 [00:02<00:20, 4304.69it/s]
11%|█ | 10825/100000 [00:02<00:19, 4506.56it/s]
11%|█▏ | 11293/100000 [00:02<00:19, 4462.92it/s]
12%|█▏ | 11752/100000 [00:02<00:20, 4350.13it/s]
12%|█▏ | 12376/100000 [00:02<00:17, 4878.51it/s]
13%|█▎ | 12923/100000 [00:02<00:17, 5047.00it/s]
13%|█▎ | 13436/100000 [00:02<00:20, 4272.59it/s]
14%|█▍ | 13889/100000 [00:03<00:20, 4285.50it/s]
14%|█▍ | 14448/100000 [00:03<00:18, 4634.16it/s]
15%|█▌ | 15044/100000 [00:03<00:17, 4917.67it/s]
16%|█▌ | 15699/100000 [00:03<00:15, 5373.68it/s]
16%|█▌ | 16249/100000 [00:03<00:16, 5119.84it/s]
17%|█▋ | 16772/100000 [00:03<00:19, 4329.80it/s]
17%|█▋ | 17232/100000 [00:03<00:18, 4386.67it/s]
18%|█▊ | 17690/100000 [00:03<00:19, 4264.91it/s]
18%|█▊ | 18130/100000 [00:03<00:19, 4250.06it/s]
19%|█▊ | 18636/100000 [00:04<00:18, 4470.38it/s]
19%|█▉ | 19092/100000 [00:04<00:18, 4437.26it/s]
20%|█▉ | 19563/100000 [00:04<00:17, 4514.02it/s]20000 / 100000 = 0.2
20%|██ | 20206/100000 [00:04<00:15, 5063.13it/s]
21%|██ | 20718/100000 [00:04<00:16, 4941.99it/s]
21%|██ | 21232/100000 [00:04<00:15, 4998.35it/s]
22%|██▏ | 21809/100000 [00:04<00:14, 5223.07it/s]
22%|██▏ | 22335/100000 [00:04<00:14, 5190.10it/s]
23%|██▎ | 22857/100000 [00:04<00:17, 4382.08it/s]
23%|██▎ | 23318/100000 [00:05<00:18, 4192.32it/s]
24%|██▍ | 23754/100000 [00:05<00:18, 4129.58it/s]
24%|██▍ | 24178/100000 [00:05<00:18, 4135.06it/s]
25%|██▍ | 24599/100000 [00:05<00:18, 4112.38it/s]
25%|██▌ | 25028/100000 [00:05<00:18, 4161.34it/s]
25%|██▌ | 25449/100000 [00:05<00:20, 3574.01it/s]
26%|██▌ | 25878/100000 [00:05<00:19, 3759.08it/s]
27%|██▋ | 26566/100000 [00:05<00:15, 4600.94it/s]
27%|██▋ | 27261/100000 [00:05<00:13, 5254.27it/s]
28%|██▊ | 27806/100000 [00:06<00:14, 5051.95it/s]
28%|██▊ | 28326/100000 [00:06<00:14, 5039.90it/s]
29%|██▉ | 28865/100000 [00:06<00:13, 5138.40it/s]
29%|██▉ | 29394/100000 [00:06<00:13, 5180.65it/s]
30%|██▉ | 29918/100000 [00:06<00:13, 5073.53it/s]30000 / 100000 = 0.3
30%|███ | 30430/100000 [00:06<00:17, 4062.73it/s]
31%|███ | 30871/100000 [00:06<00:16, 4091.86it/s]
31%|███▏ | 31305/100000 [00:06<00:17, 4040.75it/s]
32%|███▏ | 31903/100000 [00:07<00:14, 4553.03it/s]
33%|███▎ | 32592/100000 [00:07<00:12, 5192.09it/s]
34%|███▍ | 34062/100000 [00:07<00:08, 7867.03it/s]
36%|███▌ | 35661/100000 [00:07<00:06, 10193.81it/s]
37%|███▋ | 36845/100000 [00:07<00:05, 10670.88it/s]
38%|███▊ | 37937/100000 [00:07<00:08, 7176.17it/s]
39%|███▉ | 38823/100000 [00:07<00:08, 7282.53it/s]
40%|███▉ | 39672/100000 [00:08<00:10, 6032.53it/s]40000 / 100000 = 0.4
40%|████ | 40388/100000 [00:08<00:11, 5325.58it/s]
41%|████ | 41005/100000 [00:08<00:11, 5295.61it/s]
42%|████▏ | 41593/100000 [00:08<00:12, 4577.78it/s]
42%|████▏ | 42100/100000 [00:08<00:12, 4514.34it/s]
43%|████▎ | 42651/100000 [00:08<00:12, 4735.10it/s]
43%|████▎ | 43156/100000 [00:08<00:11, 4793.54it/s]
44%|████▎ | 43659/100000 [00:08<00:11, 4823.85it/s]
44%|████▍ | 44265/100000 [00:09<00:10, 5152.84it/s]
45%|████▍ | 44796/100000 [00:09<00:12, 4373.95it/s]
45%|████▌ | 45263/100000 [00:09<00:13, 4143.29it/s]
46%|████▌ | 45698/100000 [00:09<00:15, 3594.67it/s]
46%|████▌ | 46081/100000 [00:09<00:14, 3641.34it/s]
46%|████▋ | 46463/100000 [00:09<00:14, 3620.46it/s]
47%|████▋ | 46882/100000 [00:09<00:14, 3767.50it/s]
47%|████▋ | 47312/100000 [00:09<00:13, 3910.97it/s]
48%|████▊ | 48040/100000 [00:09<00:10, 4848.39it/s]
49%|████▊ | 48609/100000 [00:10<00:10, 5087.66it/s]
49%|████▉ | 49129/100000 [00:10<00:10, 4809.32it/s]
50%|████▉ | 49621/100000 [00:10<00:12, 4090.71it/s]50000 / 100000 = 0.5
50%|█████ | 50172/100000 [00:10<00:11, 4448.19it/s]
51%|█████ | 50651/100000 [00:10<00:10, 4538.15it/s]
51%|█████ | 51188/100000 [00:10<00:10, 4766.38it/s]
52%|█████▏ | 51743/100000 [00:10<00:09, 4986.98it/s]
52%|█████▏ | 52254/100000 [00:10<00:09, 4820.91it/s]
53%|█████▎ | 52745/100000 [00:10<00:09, 4825.39it/s]
53%|█████▎ | 53234/100000 [00:11<00:09, 4808.02it/s]
54%|█████▎ | 53720/100000 [00:11<00:09, 4795.16it/s]
54%|█████▍ | 54203/100000 [00:11<00:09, 4732.12it/s]
55%|█████▍ | 54679/100000 [00:11<00:09, 4735.41it/s]
55%|█████▌ | 55231/100000 [00:11<00:09, 4964.80it/s]
56%|█████▌ | 55730/100000 [00:11<00:09, 4817.02it/s]
56%|█████▋ | 56388/100000 [00:11<00:08, 5327.37it/s]
57%|█████▋ | 56959/100000 [00:11<00:07, 5437.76it/s]
58%|█████▊ | 57506/100000 [00:12<00:09, 4334.61it/s]
58%|█████▊ | 57977/100000 [00:12<00:09, 4256.70it/s]
58%|█████▊ | 58428/100000 [00:12<00:09, 4274.61it/s]
59%|█████▉ | 58874/100000 [00:12<00:11, 3623.75it/s]
59%|█████▉ | 59283/100000 [00:12<00:10, 3735.89it/s]
60%|█████▉ | 59690/100000 [00:12<00:10, 3821.19it/s]60000 / 100000 = 0.6
60%|██████ | 60109/100000 [00:12<00:10, 3919.62it/s]
61%|██████ | 60568/100000 [00:12<00:09, 4104.26it/s]
61%|██████ | 60989/100000 [00:12<00:09, 4100.81it/s]
61%|██████▏ | 61407/100000 [00:13<00:09, 4085.79it/s]
62%|██████▏ | 61821/100000 [00:13<00:09, 4090.60it/s]
62%|██████▏ | 62296/100000 [00:13<00:08, 4282.09it/s]
63%|██████▎ | 62728/100000 [00:13<00:08, 4173.32it/s]
63%|██████▎ | 63148/100000 [00:13<00:10, 3518.32it/s]
64%|██████▎ | 63519/100000 [00:13<00:10, 3558.19it/s]
64%|██████▍ | 63889/100000 [00:13<00:10, 3565.58it/s]
64%|██████▍ | 64264/100000 [00:13<00:09, 3616.20it/s]
65%|██████▍ | 64867/100000 [00:13<00:08, 4298.29it/s]
66%|██████▌ | 65791/100000 [00:13<00:06, 5600.00it/s]
66%|██████▋ | 66357/100000 [00:14<00:06, 5424.40it/s]
67%|██████▋ | 66904/100000 [00:14<00:06, 5292.36it/s]
67%|██████▋ | 67456/100000 [00:14<00:06, 5355.86it/s]
68%|██████▊ | 67995/100000 [00:14<00:06, 5295.23it/s]
69%|██████▊ | 68527/100000 [00:14<00:07, 4305.16it/s]
69%|██████▉ | 68988/100000 [00:14<00:07, 4309.41it/s]
69%|██████▉ | 69440/100000 [00:14<00:07, 4256.16it/s]70000 / 100000 = 0.7
70%|███████ | 70064/100000 [00:14<00:06, 4783.99it/s]
71%|███████ | 71101/100000 [00:15<00:04, 6322.42it/s]
72%|███████▏ | 72088/100000 [00:15<00:03, 7323.59it/s]
73%|███████▎ | 73119/100000 [00:15<00:03, 8181.08it/s]
74%|███████▍ | 73958/100000 [00:15<00:04, 5370.55it/s]
75%|███████▍ | 74634/100000 [00:15<00:04, 5470.38it/s]
75%|███████▌ | 75281/100000 [00:15<00:04, 5650.92it/s]
76%|███████▌ | 75921/100000 [00:15<00:05, 4756.67it/s]
76%|███████▋ | 76469/100000 [00:16<00:05, 4230.10it/s]
77%|███████▋ | 76947/100000 [00:16<00:05, 4258.49it/s]
77%|███████▋ | 77413/100000 [00:16<00:05, 4241.81it/s]
78%|███████▊ | 77865/100000 [00:16<00:05, 4270.14it/s]
78%|███████▊ | 78312/100000 [00:16<00:05, 4279.33it/s]
79%|███████▉ | 78754/100000 [00:16<00:06, 3445.40it/s]
79%|███████▉ | 79132/100000 [00:16<00:07, 2977.82it/s]
79%|███████▉ | 79460/100000 [00:17<00:06, 2957.57it/s]
80%|███████▉ | 79777/100000 [00:17<00:06, 2956.03it/s]80000 / 100000 = 0.8
80%|████████ | 80206/100000 [00:17<00:06, 3284.76it/s]
81%|████████ | 80599/100000 [00:17<00:05, 3452.59it/s]
81%|████████ | 80959/100000 [00:17<00:05, 3469.53it/s]
81%|████████▏ | 81338/100000 [00:17<00:05, 3557.60it/s]
82%|████████▏ | 81714/100000 [00:17<00:05, 3614.31it/s]
82%|████████▏ | 82315/100000 [00:17<00:04, 4305.55it/s]
83%|████████▎ | 83106/100000 [00:17<00:03, 5357.00it/s]
84%|████████▎ | 83650/100000 [00:17<00:03, 4465.36it/s]
84%|████████▍ | 84127/100000 [00:18<00:03, 4485.88it/s]
85%|████████▍ | 84597/100000 [00:18<00:03, 4367.33it/s]
85%|████████▌ | 85103/100000 [00:18<00:03, 4552.92it/s]
86%|████████▌ | 85571/100000 [00:18<00:03, 4463.67it/s]
86%|████████▌ | 86067/100000 [00:18<00:03, 4601.58it/s]
87%|████████▋ | 86535/100000 [00:18<00:03, 3850.75it/s]
87%|████████▋ | 86945/100000 [00:18<00:03, 3881.82it/s]
87%|████████▋ | 87401/100000 [00:18<00:03, 4060.22it/s]
88%|████████▊ | 87822/100000 [00:19<00:03, 3930.23it/s]
88%|████████▊ | 88226/100000 [00:19<00:02, 3939.53it/s]
89%|████████▊ | 88676/100000 [00:19<00:02, 4095.61it/s]
89%|████████▉ | 89092/100000 [00:19<00:02, 4082.63it/s]
90%|████████▉ | 89505/100000 [00:19<00:02, 4003.21it/s]90000 / 100000 = 0.9
90%|█████████ | 90069/100000 [00:19<00:02, 4471.49it/s]
91%|█████████ | 90521/100000 [00:19<00:02, 4410.66it/s]
91%|█████████ | 90966/100000 [00:19<00:02, 3737.89it/s]
91%|█████████▏| 91359/100000 [00:19<00:02, 3536.51it/s]
92%|█████████▏| 91727/100000 [00:20<00:02, 3532.49it/s]
92%|█████████▏| 92090/100000 [00:20<00:02, 3514.38it/s]
92%|█████████▏| 92467/100000 [00:20<00:02, 3583.77it/s]
93%|█████████▎| 92929/100000 [00:20<00:01, 3874.03it/s]
93%|█████████▎| 93365/100000 [00:20<00:01, 4012.78it/s]
94%|█████████▍| 93821/100000 [00:20<00:01, 4170.53it/s]
94%|█████████▍| 94242/100000 [00:20<00:01, 3453.97it/s]
95%|█████████▍| 94611/100000 [00:20<00:01, 3481.61it/s]
95%|█████████▌| 95277/100000 [00:20<00:01, 4328.48it/s]
96%|█████████▌| 95803/100000 [00:20<00:00, 4583.97it/s]
96%|█████████▋| 96279/100000 [00:21<00:00, 4438.37it/s]
97%|█████████▋| 96736/100000 [00:21<00:00, 3707.06it/s]
97%|█████████▋| 97367/100000 [00:21<00:00, 4352.26it/s]
98%|█████████▊| 97836/100000 [00:21<00:00, 4361.35it/s]
98%|█████████▊| 98332/100000 [00:21<00:00, 4521.62it/s]
99%|█████████▉| 99040/100000 [00:21<00:00, 5230.82it/s]
100%|█████████▉| 99582/100000 [00:21<00:00, 5214.89it/s]
100%|██████████| 100000/100000 [00:21<00:00, 4567.10it/s]
{'f': 100, 'metric': 'angular', 'n_neighbors': 5, 'on_disk_path': 'test.annoy', 'prefault': False, 'seed': None, 'verbose': None, 'schema_version': 0, 'n_items': 100000, 'n_trees': 200, 'memory_usage_byte': 270573580, 'memory_usage_mib': 258.0390739440918}
def plot(idx, y=None, **kwargs):
import numpy as np
import matplotlib.pyplot as plt
import scikitplot.cexternals._annoy._plotting as utils
single = np.zeros(idx.get_n_items(), dtype=int)
if y is None:
double = np.random.uniform(0, 1, idx.get_n_items()).round()
# single vs double
fig, ax = plt.subplots(ncols=2, figsize=(12, 5))
alpha = kwargs.pop("alpha", 0.8)
y2 = utils.plot_annoy_index(
idx,
dims = list(range(idx.f)),
plot_kwargs={"draw_legend": False},
ax=ax[0],
)[0]
utils.plot_annoy_knn_edges(
idx,
y2,
k=1,
line_kwargs={"alpha": alpha},
ax=ax[1],
)
# idx.unbuild()
# idx.build(10)
plot(idx)

def precision(q):
limits = [10, 100, 1_000, 10_000]
k = 10
prec_n = 10
prec_sum = {}
time_sum = {}
for i in trange(prec_n):
j = random.randrange(0, n)
closest = set(q.get_nns_by_item(j, k, n))
for limit in limits:
t0 = time.time()
toplist = q.get_nns_by_item(j, k, limit)
T = time.time() - t0
found = len(closest.intersection(toplist))
hitrate = 1.0 * found / k
prec_sum[limit] = prec_sum.get(limit, 0.0) + hitrate
time_sum[limit] = time_sum.get(limit, 0.0) + T
for limit in limits:
print('limit: %-9d precision: %6.2f%% avg time: %.6fs'
% (limit, 100.0 * prec_sum[limit] / (i + 1), time_sum[limit] / (i + 1)))
q = AnnoyIndex(f, 'angular')
q.set_seed(0)
q.load('test.annoy')
precision(q)
/home/circleci/repo/galleries/examples/annoy/plot_precision_script.py:111: UserWarning:
seed=0 resets to Annoy's default seed
0%| | 0/10 [00:00<?, ?it/s]
70%|███████ | 7/10 [00:00<00:00, 68.41it/s]
100%|██████████| 10/10 [00:00<00:00, 69.71it/s]
limit: 10 precision: 13.00% avg time: 0.000145s
limit: 100 precision: 16.00% avg time: 0.000098s
limit: 1000 precision: 25.00% avg time: 0.000295s
limit: 10000 precision: 82.00% avg time: 0.001842s
Total running time of the script: (3 minutes 1.016 seconds)
Related examples