Precision annoy.AnnoyIndex with examples#
An example showing the AnnoyIndex class.
from __future__ import print_function
import random; random.seed(0)
import time
# from annoy import AnnoyIndex
# from scikitplot.annoy import AnnoyIndex
from scikitplot.annoy import Index as AnnoyIndex
try:
from tqdm.auto import tqdm, trange
except ImportError:
# Fallback: dummy versions that ignore all args/kwargs
tqdm = lambda iterable, *args, **kwargs: iterable
trange = lambda n, *args, **kwargs: range(n)
n, f = 1_000_000, 100 # 100~2.5GB
n, f = 100_000, 100 # 100~0.25GB 256~0.6GB
t = AnnoyIndex(
f=f,
metric='angular',
)
t.set_seed(0)
for i in trange(n):
if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
# v = []
# for z in range(f):
# v.append(random.gauss(0, 1))
v = [random.gauss(0, 1) for _ in range(f)]
t.add_item(i, v)
t.build(2 * f)
t.save('test.annoy')
0%| | 0/100000 [00:00<?, ?it/s]0 / 100000 = 0.0
2%|▏ | 1655/100000 [00:00<00:05, 16548.48it/s]
3%|▎ | 3398/100000 [00:00<00:05, 17065.69it/s]
5%|▌ | 5157/100000 [00:00<00:05, 17304.17it/s]
7%|▋ | 6916/100000 [00:00<00:05, 17413.36it/s]
9%|▊ | 8658/100000 [00:00<00:05, 17273.16it/s]10000 / 100000 = 0.1
10%|█ | 10407/100000 [00:00<00:05, 17343.80it/s]
12%|█▏ | 12146/100000 [00:00<00:05, 17358.32it/s]
14%|█▍ | 13906/100000 [00:00<00:04, 17433.68it/s]
16%|█▌ | 15668/100000 [00:00<00:04, 17491.25it/s]
17%|█▋ | 17418/100000 [00:01<00:04, 17450.21it/s]
19%|█▉ | 19164/100000 [00:01<00:04, 17360.80it/s]20000 / 100000 = 0.2
21%|██ | 20901/100000 [00:01<00:04, 16993.81it/s]
23%|██▎ | 22602/100000 [00:01<00:04, 16927.74it/s]
24%|██▍ | 24298/100000 [00:01<00:04, 16936.77it/s]
26%|██▌ | 25993/100000 [00:01<00:04, 16732.66it/s]
28%|██▊ | 27668/100000 [00:01<00:04, 16546.62it/s]
29%|██▉ | 29347/100000 [00:01<00:04, 16618.09it/s]30000 / 100000 = 0.3
31%|███ | 31010/100000 [00:01<00:04, 16551.85it/s]
33%|███▎ | 32754/100000 [00:01<00:03, 16813.80it/s]
34%|███▍ | 34437/100000 [00:02<00:03, 16620.09it/s]
36%|███▌ | 36130/100000 [00:02<00:03, 16711.33it/s]
38%|███▊ | 37876/100000 [00:02<00:03, 16933.12it/s]
40%|███▉ | 39622/100000 [00:02<00:03, 17088.20it/s]40000 / 100000 = 0.4
41%|████▏ | 41363/100000 [00:02<00:03, 17182.08it/s]
43%|████▎ | 43101/100000 [00:02<00:03, 17237.51it/s]
45%|████▍ | 44839/100000 [00:02<00:03, 17278.80it/s]
47%|████▋ | 46568/100000 [00:02<00:03, 17057.82it/s]
48%|████▊ | 48302/100000 [00:02<00:03, 17140.55it/s]50000 / 100000 = 0.5
50%|█████ | 50059/100000 [00:02<00:02, 17267.80it/s]
52%|█████▏ | 51796/100000 [00:03<00:02, 17297.66it/s]
54%|█████▎ | 53535/100000 [00:03<00:02, 17324.94it/s]
55%|█████▌ | 55268/100000 [00:03<00:02, 16683.53it/s]
57%|█████▋ | 57026/100000 [00:03<00:02, 16944.61it/s]
59%|█████▊ | 58725/100000 [00:03<00:02, 16859.67it/s]60000 / 100000 = 0.6
60%|██████ | 60466/100000 [00:03<00:02, 17019.80it/s]
62%|██████▏ | 62235/100000 [00:03<00:02, 17216.44it/s]
64%|██████▍ | 63997/100000 [00:03<00:02, 17333.79it/s]
66%|██████▌ | 65732/100000 [00:03<00:01, 17338.23it/s]
67%|██████▋ | 67467/100000 [00:03<00:02, 16164.14it/s]
69%|██████▉ | 69100/100000 [00:04<00:01, 16064.19it/s]70000 / 100000 = 0.7
71%|███████ | 70857/100000 [00:04<00:01, 16495.32it/s]
73%|███████▎ | 72517/100000 [00:04<00:01, 15959.40it/s]
74%|███████▍ | 74123/100000 [00:04<00:01, 15612.09it/s]
76%|███████▌ | 75695/100000 [00:04<00:01, 15641.02it/s]
77%|███████▋ | 77265/100000 [00:04<00:01, 15178.40it/s]
79%|███████▉ | 78789/100000 [00:04<00:01, 15138.24it/s]80000 / 100000 = 0.8
80%|████████ | 80443/100000 [00:04<00:01, 15543.42it/s]
82%|████████▏ | 82178/100000 [00:04<00:01, 16069.59it/s]
84%|████████▍ | 83938/100000 [00:05<00:00, 16519.81it/s]
86%|████████▌ | 85594/100000 [00:05<00:00, 16384.68it/s]
87%|████████▋ | 87349/100000 [00:05<00:00, 16727.26it/s]
89%|████████▉ | 89085/100000 [00:05<00:00, 16913.53it/s]90000 / 100000 = 0.9
91%|█████████ | 90847/100000 [00:05<00:00, 17123.09it/s]
93%|█████████▎| 92606/100000 [00:05<00:00, 17261.34it/s]
94%|█████████▍| 94369/100000 [00:05<00:00, 17370.27it/s]
96%|█████████▌| 96124/100000 [00:05<00:00, 17421.30it/s]
98%|█████████▊| 97867/100000 [00:05<00:00, 17300.86it/s]
100%|█████████▉| 99598/100000 [00:05<00:00, 16710.91it/s]
100%|██████████| 100000/100000 [00:05<00:00, 16812.66it/s]
True
def precision(q):
limits = [10, 100, 1_000, 10_000]
k = 10
prec_n = 10
prec_sum = {}
time_sum = {}
for i in trange(prec_n):
j = random.randrange(0, n)
closest = set(q.get_nns_by_item(j, k, n))
for limit in limits:
t0 = time.time()
toplist = q.get_nns_by_item(j, k, limit)
T = time.time() - t0
found = len(closest.intersection(toplist))
hitrate = 1.0 * found / k
prec_sum[limit] = prec_sum.get(limit, 0.0) + hitrate
time_sum[limit] = time_sum.get(limit, 0.0) + T
for limit in limits:
print('limit: %-9d precision: %6.2f%% avg time: %.6fs'
% (limit, 100.0 * prec_sum[limit] / (i + 1), time_sum[limit] / (i + 1)))
q = AnnoyIndex(f, 'angular')
q.set_seed(0)
q.load('test.annoy')
precision(q)
0%| | 0/10 [00:00<?, ?it/s]
60%|██████ | 6/10 [00:00<00:00, 54.79it/s]
100%|██████████| 10/10 [00:00<00:00, 60.33it/s]
limit: 10 precision: 12.00% avg time: 0.000138s
limit: 100 precision: 12.00% avg time: 0.000113s
limit: 1000 precision: 23.00% avg time: 0.000332s
limit: 10000 precision: 75.00% avg time: 0.001995s
Total running time of the script: (0 minutes 15.577 seconds)
Related examples