Precision annoy.AnnoyIndex with examples#

An example showing the AnnoyIndex class.

from __future__ import print_function

import random; random.seed(0)
import time

# from annoy import AnnoyIndex
# from scikitplot.annoy import AnnoyIndex
from scikitplot.annoy import Index as AnnoyIndex

try:
    from tqdm.auto import tqdm, trange
except ImportError:
    # Fallback: dummy versions that ignore all args/kwargs
    tqdm = lambda iterable, *args, **kwargs: iterable
    trange = lambda n, *args, **kwargs: range(n)

n, f = 1_000_000, 100 # 100~2.5GB

n, f = 100_000, 100  # 100~0.25GB 256~0.6GB


t = AnnoyIndex(
    f=f,
    metric='angular',
)
t.set_seed(0)
for i in trange(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    t.add_item(i, v)

t.build(2 * f)
t.save('test.annoy')
  0%|          | 0/100000 [00:00<?, ?it/s]0 / 100000 = 0.0

  2%|▏         | 1655/100000 [00:00<00:05, 16548.48it/s]
  3%|▎         | 3398/100000 [00:00<00:05, 17065.69it/s]
  5%|▌         | 5157/100000 [00:00<00:05, 17304.17it/s]
  7%|▋         | 6916/100000 [00:00<00:05, 17413.36it/s]
  9%|▊         | 8658/100000 [00:00<00:05, 17273.16it/s]10000 / 100000 = 0.1

 10%|█         | 10407/100000 [00:00<00:05, 17343.80it/s]
 12%|█▏        | 12146/100000 [00:00<00:05, 17358.32it/s]
 14%|█▍        | 13906/100000 [00:00<00:04, 17433.68it/s]
 16%|█▌        | 15668/100000 [00:00<00:04, 17491.25it/s]
 17%|█▋        | 17418/100000 [00:01<00:04, 17450.21it/s]
 19%|█▉        | 19164/100000 [00:01<00:04, 17360.80it/s]20000 / 100000 = 0.2

 21%|██        | 20901/100000 [00:01<00:04, 16993.81it/s]
 23%|██▎       | 22602/100000 [00:01<00:04, 16927.74it/s]
 24%|██▍       | 24298/100000 [00:01<00:04, 16936.77it/s]
 26%|██▌       | 25993/100000 [00:01<00:04, 16732.66it/s]
 28%|██▊       | 27668/100000 [00:01<00:04, 16546.62it/s]
 29%|██▉       | 29347/100000 [00:01<00:04, 16618.09it/s]30000 / 100000 = 0.3

 31%|███       | 31010/100000 [00:01<00:04, 16551.85it/s]
 33%|███▎      | 32754/100000 [00:01<00:03, 16813.80it/s]
 34%|███▍      | 34437/100000 [00:02<00:03, 16620.09it/s]
 36%|███▌      | 36130/100000 [00:02<00:03, 16711.33it/s]
 38%|███▊      | 37876/100000 [00:02<00:03, 16933.12it/s]
 40%|███▉      | 39622/100000 [00:02<00:03, 17088.20it/s]40000 / 100000 = 0.4

 41%|████▏     | 41363/100000 [00:02<00:03, 17182.08it/s]
 43%|████▎     | 43101/100000 [00:02<00:03, 17237.51it/s]
 45%|████▍     | 44839/100000 [00:02<00:03, 17278.80it/s]
 47%|████▋     | 46568/100000 [00:02<00:03, 17057.82it/s]
 48%|████▊     | 48302/100000 [00:02<00:03, 17140.55it/s]50000 / 100000 = 0.5

 50%|█████     | 50059/100000 [00:02<00:02, 17267.80it/s]
 52%|█████▏    | 51796/100000 [00:03<00:02, 17297.66it/s]
 54%|█████▎    | 53535/100000 [00:03<00:02, 17324.94it/s]
 55%|█████▌    | 55268/100000 [00:03<00:02, 16683.53it/s]
 57%|█████▋    | 57026/100000 [00:03<00:02, 16944.61it/s]
 59%|█████▊    | 58725/100000 [00:03<00:02, 16859.67it/s]60000 / 100000 = 0.6

 60%|██████    | 60466/100000 [00:03<00:02, 17019.80it/s]
 62%|██████▏   | 62235/100000 [00:03<00:02, 17216.44it/s]
 64%|██████▍   | 63997/100000 [00:03<00:02, 17333.79it/s]
 66%|██████▌   | 65732/100000 [00:03<00:01, 17338.23it/s]
 67%|██████▋   | 67467/100000 [00:03<00:02, 16164.14it/s]
 69%|██████▉   | 69100/100000 [00:04<00:01, 16064.19it/s]70000 / 100000 = 0.7

 71%|███████   | 70857/100000 [00:04<00:01, 16495.32it/s]
 73%|███████▎  | 72517/100000 [00:04<00:01, 15959.40it/s]
 74%|███████▍  | 74123/100000 [00:04<00:01, 15612.09it/s]
 76%|███████▌  | 75695/100000 [00:04<00:01, 15641.02it/s]
 77%|███████▋  | 77265/100000 [00:04<00:01, 15178.40it/s]
 79%|███████▉  | 78789/100000 [00:04<00:01, 15138.24it/s]80000 / 100000 = 0.8

 80%|████████  | 80443/100000 [00:04<00:01, 15543.42it/s]
 82%|████████▏ | 82178/100000 [00:04<00:01, 16069.59it/s]
 84%|████████▍ | 83938/100000 [00:05<00:00, 16519.81it/s]
 86%|████████▌ | 85594/100000 [00:05<00:00, 16384.68it/s]
 87%|████████▋ | 87349/100000 [00:05<00:00, 16727.26it/s]
 89%|████████▉ | 89085/100000 [00:05<00:00, 16913.53it/s]90000 / 100000 = 0.9

 91%|█████████ | 90847/100000 [00:05<00:00, 17123.09it/s]
 93%|█████████▎| 92606/100000 [00:05<00:00, 17261.34it/s]
 94%|█████████▍| 94369/100000 [00:05<00:00, 17370.27it/s]
 96%|█████████▌| 96124/100000 [00:05<00:00, 17421.30it/s]
 98%|█████████▊| 97867/100000 [00:05<00:00, 17300.86it/s]
100%|█████████▉| 99598/100000 [00:05<00:00, 16710.91it/s]
100%|██████████| 100000/100000 [00:05<00:00, 16812.66it/s]

True
def precision(q):
  limits = [10, 100, 1_000, 10_000]
  k = 10
  prec_n = 10
  prec_sum = {}
  time_sum = {}

  for i in trange(prec_n):
      j = random.randrange(0, n)
      closest = set(q.get_nns_by_item(j, k, n))
      for limit in limits:
          t0 = time.time()
          toplist = q.get_nns_by_item(j, k, limit)
          T = time.time() - t0

          found = len(closest.intersection(toplist))
          hitrate = 1.0 * found / k
          prec_sum[limit] = prec_sum.get(limit, 0.0) + hitrate
          time_sum[limit] = time_sum.get(limit, 0.0) + T

  for limit in limits:
      print('limit: %-9d precision: %6.2f%% avg time: %.6fs'
      % (limit, 100.0 * prec_sum[limit] / (i + 1), time_sum[limit] / (i + 1)))
q = AnnoyIndex(f, 'angular')
q.set_seed(0)
q.load('test.annoy')
precision(q)
  0%|          | 0/10 [00:00<?, ?it/s]
 60%|██████    | 6/10 [00:00<00:00, 54.79it/s]
100%|██████████| 10/10 [00:00<00:00, 60.33it/s]
limit: 10        precision:  12.00% avg time: 0.000138s
limit: 100       precision:  12.00% avg time: 0.000113s
limit: 1000      precision:  23.00% avg time: 0.000332s
limit: 10000     precision:  75.00% avg time: 0.001995s

Tags: model-type: classification model-workflow: impute plot-type: bar level: beginner purpose: showcase

Total running time of the script: (0 minutes 15.577 seconds)

Related examples

annoy.Index python-api with examples

annoy.Index python-api with examples

annoy impute with examples

annoy impute with examples

annoy.Annoy legacy c-api with examples

annoy.Annoy legacy c-api with examples

Mmap annoy.AnnoyIndex with examples

Mmap annoy.AnnoyIndex with examples

Gallery generated by Sphinx-Gallery