Precision annoy.AnnoyIndex with examples#

An example showing the AnnoyIndex class.

from __future__ import print_function

import random; random.seed(0)
import time

# from annoy import AnnoyIndex
# from scikitplot.annoy import AnnoyIndex
from scikitplot.annoy import Index as AnnoyIndex

try:
    from tqdm.auto import tqdm, trange
except ImportError:
    # Fallback: dummy versions that ignore all args/kwargs
    tqdm = lambda iterable, *args, **kwargs: iterable
    trange = lambda n, *args, **kwargs: range(n)

n, f = 1_000_000, 100 # 100~2.5GB

n, f = 100_000, 100  # 100~0.25GB 256~0.6GB


idx = AnnoyIndex(
    f=f,
    metric='angular',
)
idx.set_seed(0)
for i in trange(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

idx.build(2 * f)
idx.save('test.annoy')
idx.info()
/home/circleci/repo/galleries/examples/annoy/plot_precision_script.py:37: UserWarning:

seed=0 resets to Annoy's default seed


  0%|          | 0/100000 [00:00<?, ?it/s]0 / 100000 = 0.0

  0%|          | 497/100000 [00:00<00:20, 4967.08it/s]
  1%|          | 1134/100000 [00:00<00:17, 5791.28it/s]
  2%|▏         | 1714/100000 [00:00<00:17, 5583.07it/s]
  2%|▏         | 2274/100000 [00:00<00:17, 5467.23it/s]
  3%|▎         | 2880/100000 [00:00<00:17, 5674.73it/s]
  3%|▎         | 3449/100000 [00:00<00:21, 4509.57it/s]
  4%|▍         | 3934/100000 [00:00<00:21, 4570.12it/s]
  4%|▍         | 4430/100000 [00:00<00:20, 4678.64it/s]
  5%|▍         | 4916/100000 [00:01<00:20, 4640.34it/s]
  5%|▌         | 5392/100000 [00:01<00:21, 4484.01it/s]
  6%|▌         | 5849/100000 [00:01<00:25, 3722.36it/s]
  6%|▋         | 6356/100000 [00:01<00:23, 4057.92it/s]
  7%|▋         | 6787/100000 [00:01<00:23, 3991.76it/s]
  7%|▋         | 7335/100000 [00:01<00:21, 4387.84it/s]
  8%|▊         | 7791/100000 [00:01<00:21, 4383.13it/s]
  8%|▊         | 8386/100000 [00:01<00:18, 4821.87it/s]
  9%|▉         | 8880/100000 [00:01<00:19, 4788.69it/s]
  9%|▉         | 9367/100000 [00:02<00:23, 3889.81it/s]
 10%|▉         | 9811/100000 [00:02<00:22, 4026.62it/s]10000 / 100000 = 0.1

 10%|█         | 10320/100000 [00:02<00:20, 4304.69it/s]
 11%|█         | 10825/100000 [00:02<00:19, 4506.56it/s]
 11%|█▏        | 11293/100000 [00:02<00:19, 4462.92it/s]
 12%|█▏        | 11752/100000 [00:02<00:20, 4350.13it/s]
 12%|█▏        | 12376/100000 [00:02<00:17, 4878.51it/s]
 13%|█▎        | 12923/100000 [00:02<00:17, 5047.00it/s]
 13%|█▎        | 13436/100000 [00:02<00:20, 4272.59it/s]
 14%|█▍        | 13889/100000 [00:03<00:20, 4285.50it/s]
 14%|█▍        | 14448/100000 [00:03<00:18, 4634.16it/s]
 15%|█▌        | 15044/100000 [00:03<00:17, 4917.67it/s]
 16%|█▌        | 15699/100000 [00:03<00:15, 5373.68it/s]
 16%|█▌        | 16249/100000 [00:03<00:16, 5119.84it/s]
 17%|█▋        | 16772/100000 [00:03<00:19, 4329.80it/s]
 17%|█▋        | 17232/100000 [00:03<00:18, 4386.67it/s]
 18%|█▊        | 17690/100000 [00:03<00:19, 4264.91it/s]
 18%|█▊        | 18130/100000 [00:03<00:19, 4250.06it/s]
 19%|█▊        | 18636/100000 [00:04<00:18, 4470.38it/s]
 19%|█▉        | 19092/100000 [00:04<00:18, 4437.26it/s]
 20%|█▉        | 19563/100000 [00:04<00:17, 4514.02it/s]20000 / 100000 = 0.2

 20%|██        | 20206/100000 [00:04<00:15, 5063.13it/s]
 21%|██        | 20718/100000 [00:04<00:16, 4941.99it/s]
 21%|██        | 21232/100000 [00:04<00:15, 4998.35it/s]
 22%|██▏       | 21809/100000 [00:04<00:14, 5223.07it/s]
 22%|██▏       | 22335/100000 [00:04<00:14, 5190.10it/s]
 23%|██▎       | 22857/100000 [00:04<00:17, 4382.08it/s]
 23%|██▎       | 23318/100000 [00:05<00:18, 4192.32it/s]
 24%|██▍       | 23754/100000 [00:05<00:18, 4129.58it/s]
 24%|██▍       | 24178/100000 [00:05<00:18, 4135.06it/s]
 25%|██▍       | 24599/100000 [00:05<00:18, 4112.38it/s]
 25%|██▌       | 25028/100000 [00:05<00:18, 4161.34it/s]
 25%|██▌       | 25449/100000 [00:05<00:20, 3574.01it/s]
 26%|██▌       | 25878/100000 [00:05<00:19, 3759.08it/s]
 27%|██▋       | 26566/100000 [00:05<00:15, 4600.94it/s]
 27%|██▋       | 27261/100000 [00:05<00:13, 5254.27it/s]
 28%|██▊       | 27806/100000 [00:06<00:14, 5051.95it/s]
 28%|██▊       | 28326/100000 [00:06<00:14, 5039.90it/s]
 29%|██▉       | 28865/100000 [00:06<00:13, 5138.40it/s]
 29%|██▉       | 29394/100000 [00:06<00:13, 5180.65it/s]
 30%|██▉       | 29918/100000 [00:06<00:13, 5073.53it/s]30000 / 100000 = 0.3

 30%|███       | 30430/100000 [00:06<00:17, 4062.73it/s]
 31%|███       | 30871/100000 [00:06<00:16, 4091.86it/s]
 31%|███▏      | 31305/100000 [00:06<00:17, 4040.75it/s]
 32%|███▏      | 31903/100000 [00:07<00:14, 4553.03it/s]
 33%|███▎      | 32592/100000 [00:07<00:12, 5192.09it/s]
 34%|███▍      | 34062/100000 [00:07<00:08, 7867.03it/s]
 36%|███▌      | 35661/100000 [00:07<00:06, 10193.81it/s]
 37%|███▋      | 36845/100000 [00:07<00:05, 10670.88it/s]
 38%|███▊      | 37937/100000 [00:07<00:08, 7176.17it/s]
 39%|███▉      | 38823/100000 [00:07<00:08, 7282.53it/s]
 40%|███▉      | 39672/100000 [00:08<00:10, 6032.53it/s]40000 / 100000 = 0.4

 40%|████      | 40388/100000 [00:08<00:11, 5325.58it/s]
 41%|████      | 41005/100000 [00:08<00:11, 5295.61it/s]
 42%|████▏     | 41593/100000 [00:08<00:12, 4577.78it/s]
 42%|████▏     | 42100/100000 [00:08<00:12, 4514.34it/s]
 43%|████▎     | 42651/100000 [00:08<00:12, 4735.10it/s]
 43%|████▎     | 43156/100000 [00:08<00:11, 4793.54it/s]
 44%|████▎     | 43659/100000 [00:08<00:11, 4823.85it/s]
 44%|████▍     | 44265/100000 [00:09<00:10, 5152.84it/s]
 45%|████▍     | 44796/100000 [00:09<00:12, 4373.95it/s]
 45%|████▌     | 45263/100000 [00:09<00:13, 4143.29it/s]
 46%|████▌     | 45698/100000 [00:09<00:15, 3594.67it/s]
 46%|████▌     | 46081/100000 [00:09<00:14, 3641.34it/s]
 46%|████▋     | 46463/100000 [00:09<00:14, 3620.46it/s]
 47%|████▋     | 46882/100000 [00:09<00:14, 3767.50it/s]
 47%|████▋     | 47312/100000 [00:09<00:13, 3910.97it/s]
 48%|████▊     | 48040/100000 [00:09<00:10, 4848.39it/s]
 49%|████▊     | 48609/100000 [00:10<00:10, 5087.66it/s]
 49%|████▉     | 49129/100000 [00:10<00:10, 4809.32it/s]
 50%|████▉     | 49621/100000 [00:10<00:12, 4090.71it/s]50000 / 100000 = 0.5

 50%|█████     | 50172/100000 [00:10<00:11, 4448.19it/s]
 51%|█████     | 50651/100000 [00:10<00:10, 4538.15it/s]
 51%|█████     | 51188/100000 [00:10<00:10, 4766.38it/s]
 52%|█████▏    | 51743/100000 [00:10<00:09, 4986.98it/s]
 52%|█████▏    | 52254/100000 [00:10<00:09, 4820.91it/s]
 53%|█████▎    | 52745/100000 [00:10<00:09, 4825.39it/s]
 53%|█████▎    | 53234/100000 [00:11<00:09, 4808.02it/s]
 54%|█████▎    | 53720/100000 [00:11<00:09, 4795.16it/s]
 54%|█████▍    | 54203/100000 [00:11<00:09, 4732.12it/s]
 55%|█████▍    | 54679/100000 [00:11<00:09, 4735.41it/s]
 55%|█████▌    | 55231/100000 [00:11<00:09, 4964.80it/s]
 56%|█████▌    | 55730/100000 [00:11<00:09, 4817.02it/s]
 56%|█████▋    | 56388/100000 [00:11<00:08, 5327.37it/s]
 57%|█████▋    | 56959/100000 [00:11<00:07, 5437.76it/s]
 58%|█████▊    | 57506/100000 [00:12<00:09, 4334.61it/s]
 58%|█████▊    | 57977/100000 [00:12<00:09, 4256.70it/s]
 58%|█████▊    | 58428/100000 [00:12<00:09, 4274.61it/s]
 59%|█████▉    | 58874/100000 [00:12<00:11, 3623.75it/s]
 59%|█████▉    | 59283/100000 [00:12<00:10, 3735.89it/s]
 60%|█████▉    | 59690/100000 [00:12<00:10, 3821.19it/s]60000 / 100000 = 0.6

 60%|██████    | 60109/100000 [00:12<00:10, 3919.62it/s]
 61%|██████    | 60568/100000 [00:12<00:09, 4104.26it/s]
 61%|██████    | 60989/100000 [00:12<00:09, 4100.81it/s]
 61%|██████▏   | 61407/100000 [00:13<00:09, 4085.79it/s]
 62%|██████▏   | 61821/100000 [00:13<00:09, 4090.60it/s]
 62%|██████▏   | 62296/100000 [00:13<00:08, 4282.09it/s]
 63%|██████▎   | 62728/100000 [00:13<00:08, 4173.32it/s]
 63%|██████▎   | 63148/100000 [00:13<00:10, 3518.32it/s]
 64%|██████▎   | 63519/100000 [00:13<00:10, 3558.19it/s]
 64%|██████▍   | 63889/100000 [00:13<00:10, 3565.58it/s]
 64%|██████▍   | 64264/100000 [00:13<00:09, 3616.20it/s]
 65%|██████▍   | 64867/100000 [00:13<00:08, 4298.29it/s]
 66%|██████▌   | 65791/100000 [00:13<00:06, 5600.00it/s]
 66%|██████▋   | 66357/100000 [00:14<00:06, 5424.40it/s]
 67%|██████▋   | 66904/100000 [00:14<00:06, 5292.36it/s]
 67%|██████▋   | 67456/100000 [00:14<00:06, 5355.86it/s]
 68%|██████▊   | 67995/100000 [00:14<00:06, 5295.23it/s]
 69%|██████▊   | 68527/100000 [00:14<00:07, 4305.16it/s]
 69%|██████▉   | 68988/100000 [00:14<00:07, 4309.41it/s]
 69%|██████▉   | 69440/100000 [00:14<00:07, 4256.16it/s]70000 / 100000 = 0.7

 70%|███████   | 70064/100000 [00:14<00:06, 4783.99it/s]
 71%|███████   | 71101/100000 [00:15<00:04, 6322.42it/s]
 72%|███████▏  | 72088/100000 [00:15<00:03, 7323.59it/s]
 73%|███████▎  | 73119/100000 [00:15<00:03, 8181.08it/s]
 74%|███████▍  | 73958/100000 [00:15<00:04, 5370.55it/s]
 75%|███████▍  | 74634/100000 [00:15<00:04, 5470.38it/s]
 75%|███████▌  | 75281/100000 [00:15<00:04, 5650.92it/s]
 76%|███████▌  | 75921/100000 [00:15<00:05, 4756.67it/s]
 76%|███████▋  | 76469/100000 [00:16<00:05, 4230.10it/s]
 77%|███████▋  | 76947/100000 [00:16<00:05, 4258.49it/s]
 77%|███████▋  | 77413/100000 [00:16<00:05, 4241.81it/s]
 78%|███████▊  | 77865/100000 [00:16<00:05, 4270.14it/s]
 78%|███████▊  | 78312/100000 [00:16<00:05, 4279.33it/s]
 79%|███████▉  | 78754/100000 [00:16<00:06, 3445.40it/s]
 79%|███████▉  | 79132/100000 [00:16<00:07, 2977.82it/s]
 79%|███████▉  | 79460/100000 [00:17<00:06, 2957.57it/s]
 80%|███████▉  | 79777/100000 [00:17<00:06, 2956.03it/s]80000 / 100000 = 0.8

 80%|████████  | 80206/100000 [00:17<00:06, 3284.76it/s]
 81%|████████  | 80599/100000 [00:17<00:05, 3452.59it/s]
 81%|████████  | 80959/100000 [00:17<00:05, 3469.53it/s]
 81%|████████▏ | 81338/100000 [00:17<00:05, 3557.60it/s]
 82%|████████▏ | 81714/100000 [00:17<00:05, 3614.31it/s]
 82%|████████▏ | 82315/100000 [00:17<00:04, 4305.55it/s]
 83%|████████▎ | 83106/100000 [00:17<00:03, 5357.00it/s]
 84%|████████▎ | 83650/100000 [00:17<00:03, 4465.36it/s]
 84%|████████▍ | 84127/100000 [00:18<00:03, 4485.88it/s]
 85%|████████▍ | 84597/100000 [00:18<00:03, 4367.33it/s]
 85%|████████▌ | 85103/100000 [00:18<00:03, 4552.92it/s]
 86%|████████▌ | 85571/100000 [00:18<00:03, 4463.67it/s]
 86%|████████▌ | 86067/100000 [00:18<00:03, 4601.58it/s]
 87%|████████▋ | 86535/100000 [00:18<00:03, 3850.75it/s]
 87%|████████▋ | 86945/100000 [00:18<00:03, 3881.82it/s]
 87%|████████▋ | 87401/100000 [00:18<00:03, 4060.22it/s]
 88%|████████▊ | 87822/100000 [00:19<00:03, 3930.23it/s]
 88%|████████▊ | 88226/100000 [00:19<00:02, 3939.53it/s]
 89%|████████▊ | 88676/100000 [00:19<00:02, 4095.61it/s]
 89%|████████▉ | 89092/100000 [00:19<00:02, 4082.63it/s]
 90%|████████▉ | 89505/100000 [00:19<00:02, 4003.21it/s]90000 / 100000 = 0.9

 90%|█████████ | 90069/100000 [00:19<00:02, 4471.49it/s]
 91%|█████████ | 90521/100000 [00:19<00:02, 4410.66it/s]
 91%|█████████ | 90966/100000 [00:19<00:02, 3737.89it/s]
 91%|█████████▏| 91359/100000 [00:19<00:02, 3536.51it/s]
 92%|█████████▏| 91727/100000 [00:20<00:02, 3532.49it/s]
 92%|█████████▏| 92090/100000 [00:20<00:02, 3514.38it/s]
 92%|█████████▏| 92467/100000 [00:20<00:02, 3583.77it/s]
 93%|█████████▎| 92929/100000 [00:20<00:01, 3874.03it/s]
 93%|█████████▎| 93365/100000 [00:20<00:01, 4012.78it/s]
 94%|█████████▍| 93821/100000 [00:20<00:01, 4170.53it/s]
 94%|█████████▍| 94242/100000 [00:20<00:01, 3453.97it/s]
 95%|█████████▍| 94611/100000 [00:20<00:01, 3481.61it/s]
 95%|█████████▌| 95277/100000 [00:20<00:01, 4328.48it/s]
 96%|█████████▌| 95803/100000 [00:20<00:00, 4583.97it/s]
 96%|█████████▋| 96279/100000 [00:21<00:00, 4438.37it/s]
 97%|█████████▋| 96736/100000 [00:21<00:00, 3707.06it/s]
 97%|█████████▋| 97367/100000 [00:21<00:00, 4352.26it/s]
 98%|█████████▊| 97836/100000 [00:21<00:00, 4361.35it/s]
 98%|█████████▊| 98332/100000 [00:21<00:00, 4521.62it/s]
 99%|█████████▉| 99040/100000 [00:21<00:00, 5230.82it/s]
100%|█████████▉| 99582/100000 [00:21<00:00, 5214.89it/s]
100%|██████████| 100000/100000 [00:21<00:00, 4567.10it/s]

{'f': 100, 'metric': 'angular', 'n_neighbors': 5, 'on_disk_path': 'test.annoy', 'prefault': False, 'seed': None, 'verbose': None, 'schema_version': 0, 'n_items': 100000, 'n_trees': 200, 'memory_usage_byte': 270573580, 'memory_usage_mib': 258.0390739440918}
def plot(idx, y=None, **kwargs):
    import numpy as np
    import matplotlib.pyplot as plt
    import scikitplot.cexternals._annoy._plotting as utils

    single = np.zeros(idx.get_n_items(), dtype=int)
    if y is None:
        double = np.random.uniform(0, 1, idx.get_n_items()).round()

    # single vs double
    fig, ax = plt.subplots(ncols=2, figsize=(12, 5))
    alpha = kwargs.pop("alpha", 0.8)
    y2 = utils.plot_annoy_index(
        idx,
        dims = list(range(idx.f)),
        plot_kwargs={"draw_legend": False},
        ax=ax[0],
    )[0]
    utils.plot_annoy_knn_edges(
        idx,
        y2,
        k=1,
        line_kwargs={"alpha": alpha},
        ax=ax[1],
    )

# idx.unbuild()
# idx.build(10)
plot(idx)
plot precision script
def precision(q):
  limits = [10, 100, 1_000, 10_000]
  k = 10
  prec_n = 10
  prec_sum = {}
  time_sum = {}

  for i in trange(prec_n):
      j = random.randrange(0, n)
      closest = set(q.get_nns_by_item(j, k, n))
      for limit in limits:
          t0 = time.time()
          toplist = q.get_nns_by_item(j, k, limit)
          T = time.time() - t0

          found = len(closest.intersection(toplist))
          hitrate = 1.0 * found / k
          prec_sum[limit] = prec_sum.get(limit, 0.0) + hitrate
          time_sum[limit] = time_sum.get(limit, 0.0) + T

  for limit in limits:
      print('limit: %-9d precision: %6.2f%% avg time: %.6fs'
      % (limit, 100.0 * prec_sum[limit] / (i + 1), time_sum[limit] / (i + 1)))
q = AnnoyIndex(f, 'angular')
q.set_seed(0)
q.load('test.annoy')
precision(q)
/home/circleci/repo/galleries/examples/annoy/plot_precision_script.py:111: UserWarning:

seed=0 resets to Annoy's default seed


  0%|          | 0/10 [00:00<?, ?it/s]
 70%|███████   | 7/10 [00:00<00:00, 68.41it/s]
100%|██████████| 10/10 [00:00<00:00, 69.71it/s]
limit: 10        precision:  13.00% avg time: 0.000145s
limit: 100       precision:  16.00% avg time: 0.000098s
limit: 1000      precision:  25.00% avg time: 0.000295s
limit: 10000     precision:  82.00% avg time: 0.001842s

Tags: model-type: classification model-workflow: impute plot-type: bar level: beginner purpose: showcase

Total running time of the script: (3 minutes 1.016 seconds)

Related examples

annoy.Index to NPY or CSV with examples

annoy.Index to NPY or CSV with examples

Mmap annoy.AnnoyIndex with examples

Mmap annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

annoy.Annoy legacy c-api with examples

annoy.Annoy legacy c-api with examples

Gallery generated by Sphinx-Gallery