Precision annoy.AnnoyIndex with examples#
An example showing the AnnoyIndex class.
from __future__ import print_function
import random; random.seed(0)
import time
# from annoy import AnnoyIndex
# from scikitplot.annoy import AnnoyIndex
from scikitplot.annoy import Index as AnnoyIndex
try:
from tqdm.auto import tqdm, trange
except ImportError:
# Fallback: dummy versions that ignore all args/kwargs
tqdm = lambda iterable, *args, **kwargs: iterable
trange = lambda n, *args, **kwargs: range(n)
n, f = 1_000_000, 100 # 100~2.5GB
n, f = 100_000, 100 # 100~0.25GB 256~0.6GB
idx = AnnoyIndex(
f=f,
metric='angular',
)
idx.set_seed(0)
for i in trange(n):
if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
# v = []
# for z in range(f):
# v.append(random.gauss(0, 1))
v = [random.gauss(0, 1) for _ in range(f)]
idx.add_item(i, v)
idx.build(2 * f)
idx.save('test.annoy')
idx.info()
/home/circleci/repo/galleries/examples/annoy/plot_precision_script.py:37: UserWarning:
seed=0 resets to Annoy's default seed
0%| | 0/100000 [00:00<?, ?it/s]0 / 100000 = 0.0
1%| | 503/100000 [00:00<00:19, 5027.12it/s]
1%| | 1149/100000 [00:00<00:16, 5869.57it/s]
2%|▏ | 1736/100000 [00:00<00:17, 5641.63it/s]
2%|▏ | 2445/100000 [00:00<00:15, 6198.49it/s]
3%|▎ | 3067/100000 [00:00<00:18, 5321.97it/s]
4%|▎ | 3618/100000 [00:00<00:20, 4663.33it/s]
4%|▍ | 4106/100000 [00:00<00:20, 4653.67it/s]
5%|▍ | 4715/100000 [00:00<00:18, 5051.20it/s]
5%|▌ | 5236/100000 [00:01<00:20, 4534.43it/s]
6%|▌ | 5708/100000 [00:01<00:20, 4504.18it/s]
6%|▌ | 6171/100000 [00:01<00:22, 4183.01it/s]
7%|▋ | 6657/100000 [00:01<00:21, 4360.45it/s]
7%|▋ | 7479/100000 [00:01<00:17, 5413.78it/s]
8%|▊ | 8039/100000 [00:01<00:17, 5350.19it/s]
9%|▊ | 8587/100000 [00:01<00:19, 4697.45it/s]
9%|▉ | 9079/100000 [00:01<00:19, 4670.65it/s]
10%|▉ | 9561/100000 [00:02<00:21, 4214.71it/s]
10%|▉ | 9999/100000 [00:02<00:21, 4139.86it/s]10000 / 100000 = 0.1
10%|█ | 10473/100000 [00:02<00:22, 3960.04it/s]
11%|█ | 11086/100000 [00:02<00:19, 4511.69it/s]
12%|█▏ | 11553/100000 [00:02<00:20, 4389.46it/s]
12%|█▏ | 12003/100000 [00:02<00:21, 4004.09it/s]
12%|█▏ | 12416/100000 [00:02<00:21, 3990.90it/s]
13%|█▎ | 12824/100000 [00:02<00:21, 3995.23it/s]
13%|█▎ | 13230/100000 [00:02<00:21, 3983.62it/s]
14%|█▍ | 13947/100000 [00:03<00:17, 4878.17it/s]
15%|█▍ | 14738/100000 [00:03<00:14, 5745.99it/s]
15%|█▌ | 15323/100000 [00:03<00:18, 4621.49it/s]
16%|█▌ | 15944/100000 [00:03<00:16, 5016.37it/s]
16%|█▋ | 16483/100000 [00:03<00:17, 4911.60it/s]
17%|█▋ | 17000/100000 [00:03<00:16, 4949.81it/s]
18%|█▊ | 17514/100000 [00:03<00:18, 4534.24it/s]
18%|█▊ | 17986/100000 [00:03<00:17, 4560.39it/s]
19%|█▊ | 18549/100000 [00:03<00:16, 4849.02it/s]
19%|█▉ | 19047/100000 [00:04<00:18, 4340.35it/s]
19%|█▉ | 19498/100000 [00:04<00:19, 4202.06it/s]
20%|█▉ | 19930/100000 [00:04<00:22, 3508.01it/s]20000 / 100000 = 0.2
20%|██ | 20305/100000 [00:04<00:23, 3458.09it/s]
21%|██ | 20667/100000 [00:04<00:23, 3423.71it/s]
21%|██ | 21021/100000 [00:04<00:22, 3441.34it/s]
21%|██▏ | 21388/100000 [00:04<00:22, 3502.18it/s]
22%|██▏ | 21891/100000 [00:04<00:21, 3615.56it/s]
23%|██▎ | 22769/100000 [00:05<00:15, 4994.29it/s]
23%|██▎ | 23354/100000 [00:05<00:14, 5230.21it/s]
24%|██▍ | 23893/100000 [00:05<00:16, 4664.14it/s]
25%|██▍ | 24502/100000 [00:05<00:14, 5039.34it/s]
25%|██▌ | 25026/100000 [00:05<00:14, 5085.23it/s]
26%|██▌ | 25565/100000 [00:05<00:14, 5169.34it/s]
26%|██▌ | 26093/100000 [00:05<00:14, 5047.35it/s]
27%|██▋ | 26606/100000 [00:05<00:14, 5022.75it/s]
27%|██▋ | 27114/100000 [00:05<00:14, 4993.78it/s]
28%|██▊ | 27618/100000 [00:06<00:17, 4024.93it/s]
28%|██▊ | 28053/100000 [00:06<00:17, 4082.34it/s]
28%|██▊ | 28485/100000 [00:06<00:17, 4096.02it/s]
29%|██▉ | 28911/100000 [00:06<00:17, 4076.90it/s]
29%|██▉ | 29368/100000 [00:06<00:16, 4211.84it/s]
30%|██▉ | 29841/100000 [00:06<00:16, 4357.48it/s]30000 / 100000 = 0.3
30%|███ | 30285/100000 [00:06<00:17, 3924.44it/s]
31%|███ | 30690/100000 [00:06<00:17, 3931.27it/s]
31%|███ | 31092/100000 [00:06<00:19, 3536.59it/s]
31%|███▏ | 31458/100000 [00:07<00:19, 3567.43it/s]
32%|███▏ | 31893/100000 [00:07<00:18, 3778.69it/s]
32%|███▏ | 32280/100000 [00:07<00:18, 3746.72it/s]
33%|███▎ | 32661/100000 [00:07<00:18, 3697.95it/s]
33%|███▎ | 33035/100000 [00:07<00:18, 3671.56it/s]
34%|███▍ | 34028/100000 [00:07<00:12, 5461.06it/s]
35%|███▍ | 34585/100000 [00:07<00:11, 5490.08it/s]
35%|███▌ | 35142/100000 [00:07<00:14, 4357.69it/s]
36%|███▌ | 35622/100000 [00:08<00:14, 4466.75it/s]
36%|███▌ | 36100/100000 [00:08<00:14, 4498.28it/s]
37%|███▋ | 36613/100000 [00:08<00:13, 4668.89it/s]
37%|███▋ | 37098/100000 [00:08<00:13, 4670.96it/s]
38%|███▊ | 37578/100000 [00:08<00:15, 4136.65it/s]
38%|███▊ | 38011/100000 [00:08<00:16, 3780.40it/s]
38%|███▊ | 38406/100000 [00:08<00:16, 3798.77it/s]
39%|███▉ | 38853/100000 [00:08<00:15, 3974.93it/s]
39%|███▉ | 39468/100000 [00:08<00:13, 4569.05it/s]40000 / 100000 = 0.4
41%|████ | 40830/100000 [00:09<00:08, 7111.59it/s]
43%|████▎ | 42519/100000 [00:09<00:05, 9910.69it/s]
44%|████▎ | 43542/100000 [00:09<00:06, 8808.77it/s]
44%|████▍ | 44468/100000 [00:09<00:08, 6353.51it/s]
45%|████▌ | 45226/100000 [00:09<00:10, 5421.05it/s]
46%|████▌ | 45869/100000 [00:09<00:10, 5160.34it/s]
46%|████▋ | 46453/100000 [00:10<00:11, 4759.15it/s]
47%|████▋ | 46975/100000 [00:10<00:12, 4391.43it/s]
47%|████▋ | 47445/100000 [00:10<00:13, 4040.47it/s]
48%|████▊ | 47869/100000 [00:10<00:12, 4069.59it/s]
49%|████▊ | 48646/100000 [00:10<00:10, 4941.32it/s]
49%|████▉ | 49175/100000 [00:10<00:11, 4458.98it/s]
50%|████▉ | 49650/100000 [00:10<00:12, 4117.93it/s]50000 / 100000 = 0.5
50%|█████ | 50084/100000 [00:10<00:12, 4080.24it/s]
51%|█████ | 50507/100000 [00:11<00:12, 4087.37it/s]
51%|█████ | 50926/100000 [00:11<00:13, 3706.27it/s]
51%|█████▏ | 51308/100000 [00:11<00:13, 3686.60it/s]
52%|█████▏ | 51849/100000 [00:11<00:11, 4138.23it/s]
52%|█████▏ | 52288/100000 [00:11<00:11, 4206.26it/s]
53%|█████▎ | 52721/100000 [00:11<00:11, 4240.65it/s]
53%|█████▎ | 53152/100000 [00:11<00:12, 3757.05it/s]
54%|█████▎ | 53686/100000 [00:11<00:11, 4175.90it/s]
54%|█████▍ | 54119/100000 [00:11<00:10, 4206.18it/s]
55%|█████▍ | 54551/100000 [00:12<00:11, 4096.38it/s]
55%|█████▍ | 54969/100000 [00:12<00:12, 3693.25it/s]
55%|█████▌ | 55382/100000 [00:12<00:11, 3807.59it/s]
56%|█████▌ | 55784/100000 [00:12<00:11, 3865.01it/s]
56%|█████▌ | 56178/100000 [00:12<00:12, 3496.48it/s]
57%|█████▋ | 56569/100000 [00:12<00:12, 3605.36it/s]
57%|█████▋ | 56939/100000 [00:12<00:12, 3560.42it/s]
57%|█████▋ | 57343/100000 [00:12<00:11, 3693.63it/s]
58%|█████▊ | 57744/100000 [00:12<00:11, 3782.87it/s]
58%|█████▊ | 58347/100000 [00:13<00:09, 4431.03it/s]
59%|█████▉ | 58932/100000 [00:13<00:08, 4845.04it/s]
59%|█████▉ | 59422/100000 [00:13<00:09, 4280.54it/s]
60%|█████▉ | 59866/100000 [00:13<00:09, 4257.97it/s]60000 / 100000 = 0.6
60%|██████ | 60303/100000 [00:13<00:09, 4209.88it/s]
61%|██████ | 60871/100000 [00:13<00:09, 4265.99it/s]
61%|██████▏ | 61303/100000 [00:13<00:09, 4255.74it/s]
62%|██████▏ | 61732/100000 [00:13<00:09, 4203.73it/s]
62%|██████▏ | 62155/100000 [00:13<00:09, 4194.10it/s]
63%|██████▎ | 62576/100000 [00:14<00:08, 4190.77it/s]
63%|██████▎ | 62996/100000 [00:14<00:09, 4095.48it/s]
63%|██████▎ | 63407/100000 [00:14<00:09, 3697.93it/s]
64%|██████▍ | 63806/100000 [00:14<00:09, 3776.16it/s]
64%|██████▍ | 64190/100000 [00:14<00:09, 3731.75it/s]
65%|██████▍ | 64568/100000 [00:14<00:09, 3715.75it/s]
65%|██████▍ | 64943/100000 [00:14<00:10, 3341.19it/s]
65%|██████▌ | 65411/100000 [00:14<00:09, 3698.96it/s]
66%|██████▌ | 65873/100000 [00:14<00:08, 3952.58it/s]
66%|██████▋ | 66278/100000 [00:15<00:08, 3959.34it/s]
67%|██████▋ | 66681/100000 [00:15<00:08, 3967.76it/s]
67%|██████▋ | 67158/100000 [00:15<00:07, 4199.78it/s]
68%|██████▊ | 67647/100000 [00:15<00:07, 4401.30it/s]
68%|██████▊ | 68091/100000 [00:15<00:07, 4356.84it/s]
69%|██████▊ | 68630/100000 [00:15<00:06, 4660.02it/s]
69%|██████▉ | 69199/100000 [00:15<00:06, 4963.64it/s]70000 / 100000 = 0.7
70%|███████ | 70317/100000 [00:15<00:04, 6808.32it/s]
71%|███████ | 71002/100000 [00:15<00:04, 6616.43it/s]
72%|███████▏ | 71669/100000 [00:15<00:04, 6067.57it/s]
72%|███████▏ | 72329/100000 [00:16<00:04, 6214.50it/s]
73%|███████▎ | 73036/100000 [00:16<00:04, 6455.21it/s]
74%|███████▎ | 73694/100000 [00:16<00:04, 6490.97it/s]
74%|███████▍ | 74385/100000 [00:16<00:03, 6612.96it/s]
75%|███████▌ | 75144/100000 [00:16<00:03, 6899.78it/s]
76%|███████▌ | 76097/100000 [00:16<00:03, 7483.65it/s]
78%|███████▊ | 77811/100000 [00:16<00:02, 10299.13it/s]
79%|███████▉ | 78876/100000 [00:16<00:02, 10401.46it/s]
80%|███████▉ | 79922/100000 [00:16<00:02, 9224.59it/s] 80000 / 100000 = 0.8
81%|████████ | 80873/100000 [00:17<00:02, 9219.64it/s]
82%|████████▏ | 81815/100000 [00:17<00:02, 8087.70it/s]
83%|████████▎ | 82660/100000 [00:17<00:02, 7972.63it/s]
83%|████████▎ | 83481/100000 [00:17<00:02, 7331.44it/s]
84%|████████▍ | 84237/100000 [00:17<00:02, 6624.72it/s]
85%|████████▍ | 84922/100000 [00:17<00:02, 6594.71it/s]
86%|████████▌ | 85597/100000 [00:17<00:02, 6526.32it/s]
86%|████████▋ | 86361/100000 [00:17<00:01, 6822.54it/s]
87%|████████▋ | 87054/100000 [00:17<00:02, 6230.19it/s]
88%|████████▊ | 87692/100000 [00:18<00:01, 6218.65it/s]
88%|████████▊ | 88324/100000 [00:18<00:01, 6112.17it/s]
89%|████████▉ | 88942/100000 [00:18<00:02, 5317.80it/s]
89%|████████▉ | 89494/100000 [00:18<00:02, 4882.48it/s]
90%|█████████ | 90000/100000 [00:18<00:02, 4871.48it/s]90000 / 100000 = 0.9
91%|█████████ | 90594/100000 [00:18<00:01, 5148.85it/s]
91%|█████████ | 91125/100000 [00:18<00:01, 4671.47it/s]
92%|█████████▏| 91843/100000 [00:18<00:01, 5312.08it/s]
93%|█████████▎| 92752/100000 [00:19<00:01, 6327.93it/s]
93%|█████████▎| 93411/100000 [00:19<00:01, 6383.66it/s]
94%|█████████▍| 94068/100000 [00:19<00:00, 6305.94it/s]
95%|█████████▍| 94712/100000 [00:19<00:00, 6186.43it/s]
95%|█████████▌| 95340/100000 [00:19<00:00, 5011.57it/s]
96%|█████████▌| 95883/100000 [00:19<00:00, 5114.46it/s]
96%|█████████▋| 96425/100000 [00:19<00:00, 4987.85it/s]
97%|█████████▋| 96945/100000 [00:19<00:00, 4094.37it/s]
97%|█████████▋| 97483/100000 [00:20<00:00, 4394.88it/s]
98%|█████████▊| 98062/100000 [00:20<00:00, 4743.37it/s]
99%|█████████▊| 98568/100000 [00:20<00:00, 4810.63it/s]
99%|█████████▉| 99083/100000 [00:20<00:00, 4902.56it/s]
100%|█████████▉| 99619/100000 [00:20<00:00, 5030.14it/s]
100%|██████████| 100000/100000 [00:20<00:00, 4865.89it/s]
{'f': 100, 'metric': 'angular', 'n_neighbors': 5, 'on_disk_path': 'test.annoy', 'prefault': False, 'seed': None, 'verbose': None, 'schema_version': 0, 'n_items': 100000, 'n_trees': 200, 'memory_usage_byte': 270573580, 'memory_usage_mib': 258.0390739440918}
def plot(idx, y=None, **kwargs):
import numpy as np
import matplotlib.pyplot as plt
import scikitplot.cexternals._annoy._plotting as utils
single = np.zeros(idx.get_n_items(), dtype=int)
if y is None:
double = np.random.uniform(0, 1, idx.get_n_items()).round()
# single vs double
fig, ax = plt.subplots(ncols=2, figsize=(12, 5))
alpha = kwargs.pop("alpha", 0.8)
y2 = utils.plot_annoy_index(
idx,
dims = list(range(idx.f)),
plot_kwargs={"draw_legend": False},
ax=ax[0],
)[0]
utils.plot_annoy_knn_edges(
idx,
y2,
k=1,
line_kwargs={"alpha": alpha},
ax=ax[1],
)
# idx.unbuild()
# idx.build(10)
plot(idx)

def precision(q):
limits = [10, 100, 1_000, 10_000]
k = 10
prec_n = 10
prec_sum = {}
time_sum = {}
for i in trange(prec_n):
j = random.randrange(0, n)
closest = set(q.get_nns_by_item(j, k, n))
for limit in limits:
t0 = time.time()
toplist = q.get_nns_by_item(j, k, limit)
T = time.time() - t0
found = len(closest.intersection(toplist))
hitrate = 1.0 * found / k
prec_sum[limit] = prec_sum.get(limit, 0.0) + hitrate
time_sum[limit] = time_sum.get(limit, 0.0) + T
for limit in limits:
print('limit: %-9d precision: %6.2f%% avg time: %.6fs'
% (limit, 100.0 * prec_sum[limit] / (i + 1), time_sum[limit] / (i + 1)))
q = AnnoyIndex(f, 'angular')
q.set_seed(0)
q.load('test.annoy')
precision(q)
/home/circleci/repo/galleries/examples/annoy/plot_precision_script.py:111: UserWarning:
seed=0 resets to Annoy's default seed
0%| | 0/10 [00:00<?, ?it/s]
70%|███████ | 7/10 [00:00<00:00, 67.03it/s]
100%|██████████| 10/10 [00:00<00:00, 68.22it/s]
limit: 10 precision: 13.00% avg time: 0.000159s
limit: 100 precision: 16.00% avg time: 0.000101s
limit: 1000 precision: 25.00% avg time: 0.000302s
limit: 10000 precision: 82.00% avg time: 0.001917s
Total running time of the script: (2 minutes 55.349 seconds)
Related examples