Precision annoy.AnnoyIndex with examples#

An example showing the AnnoyIndex class.

from __future__ import print_function

import random; random.seed(0)
import time

# from annoy import AnnoyIndex
# from scikitplot.cexternals.annoy import AnnoyIndex
from scikitplot.cexternals.annoy import Index as AnnoyIndex

try:
    from tqdm.auto import tqdm, trange
except ImportError:
    # Fallback: dummy versions that ignore all args/kwargs
    tqdm = lambda iterable, *args, **kwargs: iterable
    trange = lambda n, *args, **kwargs: range(n)

n, f = 1_000_000, 100 # 100~2.5GB

n, f = 100_000, 100  # 100~0.25GB 256~0.6GB


t = AnnoyIndex(
    f=f,
    metric='angular',
)
t.set_seed(0)
for i in trange(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    t.add_item(i, v)

t.build(2 * f)
t.save('test.annoy')
  0%|          | 0/100000 [00:00<?, ?it/s]0 / 100000 = 0.0

  2%|▏         | 1691/100000 [00:00<00:05, 16901.84it/s]
  3%|▎         | 3382/100000 [00:00<00:05, 16604.97it/s]
  5%|▌         | 5043/100000 [00:00<00:06, 15266.09it/s]
  7%|▋         | 6751/100000 [00:00<00:05, 15940.52it/s]
  8%|▊         | 8394/100000 [00:00<00:05, 16108.72it/s]10000 / 100000 = 0.1

 10%|█         | 10068/100000 [00:00<00:05, 16315.00it/s]
 12%|█▏        | 11776/100000 [00:00<00:05, 16558.14it/s]
 13%|█▎        | 13463/100000 [00:00<00:05, 16655.52it/s]
 15%|█▌        | 15132/100000 [00:01<00:06, 13141.17it/s]
 17%|█▋        | 16737/100000 [00:01<00:05, 13891.83it/s]
 18%|█▊        | 18462/100000 [00:01<00:05, 14796.96it/s]20000 / 100000 = 0.2

 20%|██        | 20180/100000 [00:01<00:05, 15459.07it/s]
 22%|██▏       | 21843/100000 [00:01<00:04, 15789.95it/s]
 24%|██▎       | 23560/100000 [00:01<00:04, 16187.57it/s]
 25%|██▌       | 25281/100000 [00:01<00:04, 16483.53it/s]
 27%|██▋       | 26958/100000 [00:01<00:04, 16565.35it/s]
 29%|██▊       | 28677/100000 [00:01<00:04, 16747.84it/s]30000 / 100000 = 0.3

 30%|███       | 30364/100000 [00:01<00:04, 15157.76it/s]
 32%|███▏      | 32069/100000 [00:02<00:04, 15680.44it/s]
 34%|███▍      | 33772/100000 [00:02<00:04, 16062.56it/s]
 35%|███▌      | 35431/100000 [00:02<00:03, 16212.38it/s]
 37%|███▋      | 37131/100000 [00:02<00:03, 16440.27it/s]
 39%|███▉      | 38788/100000 [00:02<00:03, 16474.02it/s]40000 / 100000 = 0.4

 40%|████      | 40445/100000 [00:02<00:04, 14712.15it/s]
 42%|████▏     | 41957/100000 [00:02<00:04, 14424.85it/s]
 44%|████▎     | 43670/100000 [00:02<00:03, 15169.69it/s]
 45%|████▌     | 45327/100000 [00:02<00:03, 15565.01it/s]
 47%|████▋     | 47039/100000 [00:02<00:03, 16009.97it/s]
 49%|████▉     | 48758/100000 [00:03<00:03, 16350.71it/s]50000 / 100000 = 0.5

 50%|█████     | 50472/100000 [00:03<00:02, 16580.82it/s]
 52%|█████▏    | 52183/100000 [00:03<00:02, 16735.14it/s]
 54%|█████▍    | 53909/100000 [00:03<00:02, 16890.61it/s]
 56%|█████▌    | 55635/100000 [00:03<00:02, 16999.09it/s]
 57%|█████▋    | 57358/100000 [00:03<00:02, 17067.11it/s]
 59%|█████▉    | 59068/100000 [00:03<00:02, 16830.76it/s]60000 / 100000 = 0.6

 61%|██████    | 60786/100000 [00:03<00:02, 16932.84it/s]
 63%|██████▎   | 62514/100000 [00:03<00:02, 17034.57it/s]
 64%|██████▍   | 64242/100000 [00:04<00:02, 17105.33it/s]
 66%|██████▌   | 65971/100000 [00:04<00:01, 17158.26it/s]
 68%|██████▊   | 67693/100000 [00:04<00:01, 17175.07it/s]
 69%|██████▉   | 69420/100000 [00:04<00:01, 17202.97it/s]70000 / 100000 = 0.7

 71%|███████   | 71145/100000 [00:04<00:01, 17215.09it/s]
 73%|███████▎  | 72867/100000 [00:04<00:01, 17195.07it/s]
 75%|███████▍  | 74587/100000 [00:04<00:01, 17162.93it/s]
 76%|███████▋  | 76304/100000 [00:04<00:01, 16786.38it/s]
 78%|███████▊  | 78025/100000 [00:04<00:01, 16909.77it/s]
 80%|███████▉  | 79734/100000 [00:04<00:01, 16960.90it/s]80000 / 100000 = 0.8

 81%|████████▏ | 81445/100000 [00:05<00:01, 17005.06it/s]
 83%|████████▎ | 83170/100000 [00:05<00:00, 17076.19it/s]
 85%|████████▍ | 84879/100000 [00:05<00:00, 17058.23it/s]
 87%|████████▋ | 86586/100000 [00:05<00:00, 17049.61it/s]
 88%|████████▊ | 88314/100000 [00:05<00:00, 17116.69it/s]90000 / 100000 = 0.9

 90%|█████████ | 90026/100000 [00:05<00:00, 17026.11it/s]
 92%|█████████▏| 91729/100000 [00:05<00:00, 16882.98it/s]
 93%|█████████▎| 93453/100000 [00:05<00:00, 16986.69it/s]
 95%|█████████▌| 95181/100000 [00:05<00:00, 17072.75it/s]
 97%|█████████▋| 96889/100000 [00:05<00:00, 16357.12it/s]
 99%|█████████▊| 98532/100000 [00:06<00:00, 14686.86it/s]
100%|██████████| 100000/100000 [00:06<00:00, 16220.28it/s]

True
def precision(q):
  limits = [10, 100, 1_000, 10_000]
  k = 10
  prec_n = 10
  prec_sum = {}
  time_sum = {}

  for i in trange(prec_n):
      j = random.randrange(0, n)
      closest = set(q.get_nns_by_item(j, k, n))
      for limit in limits:
          t0 = time.time()
          toplist = q.get_nns_by_item(j, k, limit)
          T = time.time() - t0

          found = len(closest.intersection(toplist))
          hitrate = 1.0 * found / k
          prec_sum[limit] = prec_sum.get(limit, 0.0) + hitrate
          time_sum[limit] = time_sum.get(limit, 0.0) + T

  for limit in limits:
      print('limit: %-9d precision: %6.2f%% avg time: %.6fs'
      % (limit, 100.0 * prec_sum[limit] / (i + 1), time_sum[limit] / (i + 1)))
q = AnnoyIndex(f, 'angular')
q.set_seed(0)
q.load('test.annoy')
precision(q)
  0%|          | 0/10 [00:00<?, ?it/s]
 70%|███████   | 7/10 [00:00<00:00, 63.92it/s]
100%|██████████| 10/10 [00:00<00:00, 64.27it/s]
limit: 10        precision:  12.00% avg time: 0.000142s
limit: 100       precision:  12.00% avg time: 0.000101s
limit: 1000      precision:  23.00% avg time: 0.000305s
limit: 10000     precision:  75.00% avg time: 0.001956s

Tags: model-type: classification model-workflow: impute plot-type: bar level: beginner purpose: showcase

Total running time of the script: (0 minutes 16.309 seconds)

Related examples

Mmap annoy.AnnoyIndex with examples

Mmap annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

plot_aucplot_script with examples

plot_aucplot_script with examples

sphx_glr_auto_examples_annoy_plot_s_compile_cpp.py

Compile and run the C++ Annoy precision example.

Gallery generated by Sphinx-Gallery