annoy.Index python-api with examples#

An example showing the Index class.

import random; random.seed(0)

# from annoy import Annoy, AnnoyIndex
from scikitplot.annoy import AnnoyBase

print(AnnoyBase.__doc__)
Compiled with GCC/Clang. Using 512-bit AVX instructions.

High-performance approximate nearest neighbours (Annoy) C++ core.

This module is a low-level backend (``annoylib``). It exposes the
C++-powered :class:`Annoy` type. For day-to-day work, prefer the
high-level Python API in the :mod:`annoy` package:

    from annoy import Annoy, AnnoyIndex
# from annoy import Annoy, AnnoyIndex
from scikitplot.annoy import Annoy, AnnoyIndex, Index

print(AnnoyIndex.__doc__)
High-level Pythonic Annoy wrapper with picklable (or pickle-able).

Minimal modify spotify/annoy low-level C-API to extend Python API.

.. seealso::
    * :py:obj:`~scikitplot.annoy.Index.from_low_level`
    * https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0)
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
print(idx.info())

# help(idx.info)
Index dimension: 0
Metric         : angular
Annoy(f=0, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
{'dimension': 0, 'metric': 'angular', 'n_items': 0, 'n_trees': 0, 'memory_usage_byte': 0, 'memory_usage_mib': 0.0, 'on_disk_path': None}
from scikitplot import annoy as a

print(a.AnnoyBase)   # should show the extension type
print(a.Annoy)       # same
print(a.AnnoyIndex)  # should show <class '..._base.Index'>
print(a.Index)       # should show <class '..._base.Index'>

print(isinstance(idx, a.Index))
print(isinstance(idx, a.AnnoyBase))

print(type(idx))
print(idx.__class__.__module__)
print(idx.__class__.__mro__)
<class 'annoy.Annoy'>
<class 'annoy.Annoy'>
<class 'scikitplot.annoy._base.Index'>
<class 'scikitplot.annoy._base.Index'>
True
True
<class 'scikitplot.annoy._base.Index'>
scikitplot.annoy._base
(<class 'scikitplot.annoy._base.Index'>, <class 'scikitplot.annoy._mixins._vectors.VectorOpsMixin'>, <class 'scikitplot.annoy._mixins._ndarray.NDArrayExportMixin'>, <class 'scikitplot.annoy._mixins._io.ObjectIOMixin'>, <class 'scikitplot.annoy._mixins._manifest.ManifestMixin'>, <class 'scikitplot.annoy._mixins._pickle.PickleMixin'>, <class 'scikitplot.annoy._mixins._pickle.PathAwareAnnoy'>, <class 'annoy.Annoy'>, <class 'object'>)
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3)
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
Index dimension: 3
Metric         : angular
Annoy(f=3, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
Index dimension: 3
Metric         : angular
# =============================================================
# 2. Add items
# =============================================================
idx.add_item(0, [1, 0, 0])
idx.add_item(1, [0, 1, 0])
idx.add_item(2, [0, 0, 1])

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
Number of items: 3
Index dimension: 3
Metric         : angular
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(10, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
idx.on_disk_build("annoy_test.annoy")
# help(idx.on_disk_build)
Index dimension: 10
Metric         : angular

True
# =============================================================
# 2. Add items
# =============================================================
f=10
n=10
for i in range(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
0 / 10 = 0.0
1 / 10 = 0.1
2 / 10 = 0.2
3 / 10 = 0.3
4 / 10 = 0.4
5 / 10 = 0.5
6 / 10 = 0.6
7 / 10 = 0.7
8 / 10 = 0.8
9 / 10 = 0.9
Number of items: 10
Index dimension: 10
Metric         : angular
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=annoy_test.annoy)
# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())
# help(idx.build)
Trees: 10
Memory usage: 1620 bytes
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=annoy_test.annoy)
{'dimension': 10, 'metric': 'angular', 'n_items': 10, 'n_trees': 10, 'memory_usage_byte': 1620, 'memory_usage_mib': 0.001544952392578125, 'on_disk_path': 'annoy_test.annoy'}
idx.unbuild()
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=annoy_test.annoy)
idx.build(10)
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=annoy_test.annoy)
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
Index dimension: 0
Metric         : angular
# =============================================================
# 2. Add items
# =============================================================
f=10
n=10
for i in range(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
0 / 10 = 0.0
1 / 10 = 0.1
2 / 10 = 0.2
3 / 10 = 0.3
4 / 10 = 0.4
5 / 10 = 0.5
6 / 10 = 0.6
7 / 10 = 0.7
8 / 10 = 0.8
9 / 10 = 0.9
Number of items: 10
Index dimension: 10
Metric         : angular
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=None)
# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())
# help(idx.get_n_trees)
Trees: 10
Memory usage: 1880 bytes
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=None)
{'dimension': 10, 'metric': 'angular', 'n_items': 10, 'n_trees': 10, 'memory_usage_byte': 1880, 'memory_usage_mib': 0.00179290771484375, 'on_disk_path': None}
# =============================================================
# 4. Query — return NNSResult
# =============================================================
res = idx.get_nns_by_item(
    0,
    5,
    # search_k = -1,
    include_distances=True,
)

print(res)
([0, 2, 4, 5, 6], [0.0, 0.8915294408798218, 0.9434009790420532, 1.050995111465454, 1.2712162733078003])
# =============================================================
# 8. Query using vector
# =============================================================
res2 = idx.get_nns_by_vector(
    [random.gauss(0, 1) for _ in range(f)],
    5,
    include_distances=True
)
print("\nQuery by vector:", res2)
Query by vector: ([4, 9, 0, 6, 8], [0.8781132102012634, 0.9961007237434387, 1.0966964960098267, 1.2096866369247437, 1.2793666124343872])
# =============================================================
# 9. Low-level (non-result) mode
# =============================================================
items = idx.get_nns_by_item(0, 2, include_distances=False)
print("\nLow-level items only:", items)

items_low, d_low = idx.get_nns_by_item(0, 2, include_distances=True)
print("Low-level tuple return:", items_low, d_low)
Low-level items only: [0, 2]
Low-level tuple return: [0, 2] [0.0, 0.8915294408798218]
# =============================================================
# 10. Persistence
# =============================================================
print("\n=== Saving with binary annoy ===")
print(idx)
idx.save("annoy_test.annoy")
print(idx)

print("Loading...")
idx2 = AnnoyIndex(10, metric='angular').load("annoy_test.annoy")
print("Loaded index:", idx2)
=== Saving with binary annoy ===
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=None)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
Loading...
Loaded index: True
# =============================================================
# 11. Raw serialize / deserialize
# =============================================================
print("\n=== Raw serialize ===")
buf = idx.serialize()
new_idx = AnnoyIndex(10, metric='angular')
new_idx.deserialize(buf)
print("Deserialized index n_items:", new_idx.get_n_items())
print(idx)
print(new_idx)
=== Raw serialize ===
Deserialized index n_items: 10
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=None)
idx.unload()
print(idx)
Annoy(f=10, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
# idx.build(10)
idx.load("annoy_test.annoy")
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
# joblib
import joblib

joblib.dump(idx, "test.joblib"), joblib.load("test.joblib")
(['test.joblib'], Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy))
from scikitplot import annoy as a

f = 10
idx = a.AnnoyBase(f, "angular")

# Distinct non-zero content so we can see mismatches clearly
for i in range(20):
    idx.add_item(i, [float(i)] * f)
idx.build(10)
True
from scikitplot import annoy as a

# Legacy Support
idx = a.Index.from_low_level(idx)

import joblib
joblib.dump(idx, "test.joblib")
['test.joblib']

Tags: level: beginner purpose: showcase

Total running time of the script: (0 minutes 0.025 seconds)

Related examples

annoy.Annoy legacy c-api with examples

annoy.Annoy legacy c-api with examples

Precision annoy.AnnoyIndex with examples

Precision annoy.AnnoyIndex with examples

Mmap annoy.AnnoyIndex with examples

Mmap annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

Simple annoy.AnnoyIndex with examples

Gallery generated by Sphinx-Gallery