annoy.Index python-api with examples#

An example showing the Index class.

See also

import random; random.seed(0)

# from annoy import Annoy, AnnoyIndex
from scikitplot.annoy import AnnoyBase

print(AnnoyBase.__doc__)

Compiled with GCC/Clang. Using 512-bit AVX instructions.

High-performance approximate nearest neighbours (Annoy) C++ core.

This module is a low-level backend (``annoylib``). It exposes the
C++-powered :class:`Annoy` type. For day-to-day work, prefer the
high-level Python API in the :mod:`annoy` package:

    from annoy import Annoy, AnnoyIndex

# from annoy import Annoy, AnnoyIndex
from scikitplot.annoy import Annoy, AnnoyIndex, Index

print(AnnoyIndex.__doc__)

High-level Pythonic Annoy wrapper with picklable (or pickle-able).

Minimal modify spotify/annoy low-level C-API to extend Python API.

.. seealso::
    * :py:obj:`~scikitplot.annoy.Index.from_low_level`
    * https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled

# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0)
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
print(idx.info())

# help(idx.info)

Index dimension: 0
Metric         : angular
Annoy(f=0, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
{'dimension': 0, 'metric': 'angular', 'n_items': 0, 'n_trees': 0, 'memory_usage_byte': 0, 'memory_usage_mib': 0.0, 'on_disk_path': None}

from scikitplot import annoy as a

print(a.AnnoyBase)   # should show the extension type
print(a.Annoy)       # same
print(a.AnnoyIndex)  # should show <class '..._base.Index'>
print(a.Index)       # should show <class '..._base.Index'>

print(isinstance(idx, a.Index))
print(isinstance(idx, a.AnnoyBase))

print(type(idx))
print(idx.__class__.__module__)
print(idx.__class__.__mro__)

<class 'annoy.Annoy'>
<class 'annoy.Annoy'>
<class 'scikitplot.annoy._base.Index'>
<class 'scikitplot.annoy._base.Index'>
True
True
<class 'scikitplot.annoy._base.Index'>
scikitplot.annoy._base
(<class 'scikitplot.annoy._base.Index'>, <class 'scikitplot.annoy._mixins._vectors.VectorOpsMixin'>, <class 'scikitplot.annoy._mixins._ndarray.NDArrayExportMixin'>, <class 'scikitplot.annoy._mixins._io.ObjectIOMixin'>, <class 'scikitplot.annoy._mixins._manifest.ManifestMixin'>, <class 'scikitplot.annoy._mixins._pickle.PickleMixin'>, <class 'scikitplot.annoy._mixins._pickle.PathAwareAnnoy'>, <class 'annoy.Annoy'>, <class 'object'>)

# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3)
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)

Index dimension: 3
Metric         : angular
Annoy(f=3, metric='angular', n_items=0, n_trees=0, on_disk_path=None)

# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)

Index dimension: 3
Metric         : angular

# =============================================================
# 2. Add items
# =============================================================
idx.add_item(0, [1, 0, 0])
idx.add_item(1, [0, 1, 0])
idx.add_item(2, [0, 0, 1])

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)

Number of items: 3
Index dimension: 3
Metric         : angular

# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(10, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
idx.on_disk_build("annoy_test.annoy")
# help(idx.on_disk_build)

Index dimension: 10
Metric         : angular

True

# =============================================================
# 2. Add items
# =============================================================
f=10
n=10
for i in range(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)

/ 10 = 0.0
/ 10 = 0.1
/ 10 = 0.2
/ 10 = 0.3
/ 10 = 0.4
/ 10 = 0.5
/ 10 = 0.6
/ 10 = 0.7
/ 10 = 0.8
/ 10 = 0.9
Number of items: 10
Index dimension: 10
Metric         : angular
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=annoy_test.annoy)

# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())
# help(idx.build)

Trees: 10
Memory usage: 1620 bytes
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=annoy_test.annoy)
{'dimension': 10, 'metric': 'angular', 'n_items': 10, 'n_trees': 10, 'memory_usage_byte': 1620, 'memory_usage_mib': 0.001544952392578125, 'on_disk_path': 'annoy_test.annoy'}

idx.unbuild()
print(idx)

Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=annoy_test.annoy)

idx.build(10)
print(idx)

Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=annoy_test.annoy)

# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)

Index dimension: 0
Metric         : angular

# =============================================================
# 2. Add items
# =============================================================
f=10
n=10
for i in range(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)

/ 10 = 0.0
/ 10 = 0.1
/ 10 = 0.2
/ 10 = 0.3
/ 10 = 0.4
/ 10 = 0.5
/ 10 = 0.6
/ 10 = 0.7
/ 10 = 0.8
/ 10 = 0.9
Number of items: 10
Index dimension: 10
Metric         : angular
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=None)

# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())
# help(idx.get_n_trees)

Trees: 10
Memory usage: 1880 bytes
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=None)
{'dimension': 10, 'metric': 'angular', 'n_items': 10, 'n_trees': 10, 'memory_usage_byte': 1880, 'memory_usage_mib': 0.00179290771484375, 'on_disk_path': None}

# =============================================================
# 4. Query — return NNSResult
# =============================================================
res = idx.get_nns_by_item(
    0,
    5,
    # search_k = -1,
    include_distances=True,
)

print(res)

([0, 2, 4, 5, 6], [0.0, 0.8915294408798218, 0.9434009790420532, 1.050995111465454, 1.2712162733078003])

# =============================================================
# 8. Query using vector
# =============================================================
res2 = idx.get_nns_by_vector(
    [random.gauss(0, 1) for _ in range(f)],
    5,
    include_distances=True
)
print("\nQuery by vector:", res2)

Query by vector: ([4, 9, 0, 6, 8], [0.8781132102012634, 0.9961007237434387, 1.0966964960098267, 1.2096866369247437, 1.2793666124343872])

# =============================================================
# 9. Low-level (non-result) mode
# =============================================================
items = idx.get_nns_by_item(0, 2, include_distances=False)
print("\nLow-level items only:", items)

items_low, d_low = idx.get_nns_by_item(0, 2, include_distances=True)
print("Low-level tuple return:", items_low, d_low)

Low-level items only: [0, 2]
Low-level tuple return: [0, 2] [0.0, 0.8915294408798218]

# =============================================================
# 10. Persistence
# =============================================================
print("\n=== Saving with binary annoy ===")
print(idx)
idx.save("annoy_test.annoy")
print(idx)

print("Loading...")
idx2 = AnnoyIndex(10, metric='angular').load("annoy_test.annoy")
print("Loaded index:", idx2)

=== Saving with binary annoy ===
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=None)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
Loading...
Loaded index: True

# =============================================================
# 11. Raw serialize / deserialize
# =============================================================
print("\n=== Raw serialize ===")
buf = idx.serialize()
new_idx = AnnoyIndex(10, metric='angular')
new_idx.deserialize(buf)
print("Deserialized index n_items:", new_idx.get_n_items())
print(idx)
print(new_idx)

=== Raw serialize ===
Deserialized index n_items: 10
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=None)

idx.unload()
print(idx)

Annoy(f=10, metric='angular', n_items=0, n_trees=0, on_disk_path=None)

# idx.build(10)
idx.load("annoy_test.annoy")
print(idx)

Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)

# joblib
import joblib

joblib.dump(idx, "test.joblib"), joblib.load("test.joblib")

(['test.joblib'], Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy))

from scikitplot import annoy as a

f = 10
idx = a.AnnoyBase(f, "angular")

# Distinct non-zero content so we can see mismatches clearly
for i in range(20):
    idx.add_item(i, [float(i)] * f)
idx.build(10)

True

from scikitplot import annoy as a

# Legacy Support
idx = a.Index.from_low_level(idx)

import joblib
joblib.dump(idx, "test.joblib")

['test.joblib']

Tags: level: beginner purpose: showcase

Total running time of the script: (0 minutes 0.025 seconds)

Related examples

annoy.Annoy legacy c-api with examples

Precision annoy.AnnoyIndex with examples

Mmap annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

Gallery generated by Sphinx-Gallery

annoy.Index python-api with examples#

This Page