annoy.Annoy legacy c-api with examples#

An example showing the Annoy class.

import random; random.seed(0)

# from annoy import Annoy, AnnoyIndex
from scikitplot.annoy import Annoy as AnnoyIndex

print(AnnoyIndex.__doc__)
Compiled with GCC/Clang. Using 512-bit AVX instructions.

High-performance approximate nearest neighbours (Annoy) C++ core.

This module is a low-level backend (``annoylib``). It exposes the
C++-powered :class:`Annoy` type. For day-to-day work, prefer the
high-level Python API in the :mod:`annoy` package:

    from annoy import Annoy, AnnoyIndex
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0)
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
print(idx.info())

# help(idx.info)
Index dimension: 0
Metric         : None
Annoy(f=0, metric='unknown', n_items=0, n_trees=0, on_disk_path=None)
{'dimension': 0, 'metric': '', 'n_items': 0, 'n_trees': 0, 'memory_usage_byte': 0, 'memory_usage_mib': 0.0, 'on_disk_path': None}
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3)
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
/home/circleci/repo/galleries/examples/annoy/plot_Annoy_legacy_c_api.py:43: FutureWarning:

The default argument for metric will be removed in a future version of Annoy. Please pass metric='angular' explicitly.

Index dimension: 3
Metric         : angular
Annoy(f=3, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
Index dimension: 3
Metric         : angular
# =============================================================
# 2. Add items
# =============================================================
idx.add_item(0, [1, 0, 0])
idx.add_item(1, [0, 1, 0])
idx.add_item(2, [0, 0, 1])

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
Number of items: 3
Index dimension: 3
Metric         : angular
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(10, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
idx.on_disk_build("annoy_test.annoy")
# help(idx.on_disk_build)
Index dimension: 10
Metric         : angular

True
# =============================================================
# 2. Add items
# =============================================================
f=10
n=10
for i in range(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
0 / 10 = 0.0
1 / 10 = 0.1
2 / 10 = 0.2
3 / 10 = 0.3
4 / 10 = 0.4
5 / 10 = 0.5
6 / 10 = 0.6
7 / 10 = 0.7
8 / 10 = 0.8
9 / 10 = 0.9
Number of items: 10
Index dimension: 10
Metric         : angular
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=annoy_test.annoy)
# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())
# help(idx.build)
Trees: 10
Memory usage: 1620 bytes
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=annoy_test.annoy)
{'dimension': 10, 'metric': 'angular', 'n_items': 10, 'n_trees': 10, 'memory_usage_byte': 1620, 'memory_usage_mib': 0.001544952392578125, 'on_disk_path': 'annoy_test.annoy'}
idx.unbuild()
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=annoy_test.annoy)
idx.build(10)
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=annoy_test.annoy)
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
Index dimension: 0
Metric         : angular
# =============================================================
# 2. Add items
# =============================================================
f=10
n=10
for i in range(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
0 / 10 = 0.0
1 / 10 = 0.1
2 / 10 = 0.2
3 / 10 = 0.3
4 / 10 = 0.4
5 / 10 = 0.5
6 / 10 = 0.6
7 / 10 = 0.7
8 / 10 = 0.8
9 / 10 = 0.9
Number of items: 10
Index dimension: 10
Metric         : angular
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=None)
# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())
# help(idx.get_n_trees)
Trees: 10
Memory usage: 1880 bytes
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=None)
{'dimension': 10, 'metric': 'angular', 'n_items': 10, 'n_trees': 10, 'memory_usage_byte': 1880, 'memory_usage_mib': 0.00179290771484375, 'on_disk_path': None}
# =============================================================
# 4. Query — return NNSResult
# =============================================================
res = idx.get_nns_by_item(
    0,
    5,
    # search_k = -1,
    include_distances=True,
)

print(res)
([0, 2, 4, 5, 6], [0.0, 0.8915294408798218, 0.9434009790420532, 1.050995111465454, 1.2712162733078003])
# =============================================================
# 8. Query using vector
# =============================================================
res2 = idx.get_nns_by_vector(
    [random.gauss(0, 1) for _ in range(f)],
    5,
    include_distances=True
)
print("\nQuery by vector:", res2)
Query by vector: ([4, 9, 0, 6, 8], [0.8781132102012634, 0.9961007237434387, 1.0966964960098267, 1.2096866369247437, 1.2793666124343872])
# =============================================================
# 9. Low-level (non-result) mode
# =============================================================
items = idx.get_nns_by_item(0, 2, include_distances=False)
print("\nLow-level items only:", items)

items_low, d_low = idx.get_nns_by_item(0, 2, include_distances=True)
print("Low-level tuple return:", items_low, d_low)
Low-level items only: [0, 2]
Low-level tuple return: [0, 2] [0.0, 0.8915294408798218]
# =============================================================
# 10. Persistence
# =============================================================
print("\n=== Saving with binary annoy ===")
print(idx)
idx.save("annoy_test.annoy")
print(idx)

print("Loading...")
idx2 = AnnoyIndex(10, metric='angular').load("annoy_test.annoy")
print("Loaded index:", idx2)
=== Saving with binary annoy ===
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=None)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
Loading...
Loaded index: True
# =============================================================
# 11. Raw serialize / deserialize
# =============================================================
print("\n=== Raw serialize ===")
buf = idx.serialize()
new_idx = AnnoyIndex(10, metric='angular')
new_idx.deserialize(buf)
print("Deserialized index n_items:", new_idx.get_n_items())
print(idx)
print(new_idx)
=== Raw serialize ===
Deserialized index n_items: 10
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=None)
idx.unload()
print(idx)
Annoy(f=10, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
# idx.build(10)
idx.load("annoy_test.annoy")
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)

Tags: level: beginner purpose: showcase

Total running time of the script: (0 minutes 0.022 seconds)

Related examples

annoy.Index python-api with examples

annoy.Index python-api with examples

Precision annoy.AnnoyIndex with examples

Precision annoy.AnnoyIndex with examples

Mmap annoy.AnnoyIndex with examples

Mmap annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

Gallery generated by Sphinx-Gallery