annoy.Index python-api with examples#
An example showing the Index class.
See also
import random; random.seed(0)
# from annoy import Annoy, AnnoyIndex
from scikitplot.annoy import AnnoyBase
print(AnnoyBase.__doc__)
Compiled with GCC/Clang. Using 512-bit AVX instructions.
High-performance approximate nearest neighbours (Annoy) C++ core.
This module is a low-level backend (``annoylib``). It exposes the
C++-powered :class:`Annoy` type. For day-to-day work, prefer the
high-level Python API in the :mod:`annoy` package:
from annoy import Annoy, AnnoyIndex
# from annoy import Annoy, AnnoyIndex
from scikitplot.annoy import Annoy, AnnoyIndex, Index
print(AnnoyIndex.__doc__)
High-level Pythonic Annoy wrapper with picklable (or pickle-able).
Minimal modify spotify/annoy low-level C-API to extend Python API.
.. seealso::
* :py:obj:`~scikitplot.annoy.Index.from_low_level`
* https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0)
print("Index dimension:", idx.f)
print("Metric :", idx.metric)
print(idx)
print(idx.info())
# help(idx.info)
Index dimension: 0
Metric : angular
Annoy(f=0, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
{'dimension': 0, 'metric': 'angular', 'n_items': 0, 'n_trees': 0, 'memory_usage_byte': 0, 'memory_usage_mib': 0.0, 'on_disk_path': None}
from scikitplot import annoy as a
print(a.AnnoyBase) # should show the extension type
print(a.Annoy) # same
print(a.AnnoyIndex) # should show <class '..._base.Index'>
print(a.Index) # should show <class '..._base.Index'>
print(isinstance(idx, a.Index))
print(isinstance(idx, a.AnnoyBase))
print(type(idx))
print(idx.__class__.__module__)
print(idx.__class__.__mro__)
<class 'annoy.Annoy'>
<class 'annoy.Annoy'>
<class 'scikitplot.annoy._base.Index'>
<class 'scikitplot.annoy._base.Index'>
True
True
<class 'scikitplot.annoy._base.Index'>
scikitplot.annoy._base
(<class 'scikitplot.annoy._base.Index'>, <class 'scikitplot.annoy._mixins._vectors.VectorOpsMixin'>, <class 'scikitplot.annoy._mixins._ndarray.NDArrayExportMixin'>, <class 'scikitplot.annoy._mixins._io.ObjectIOMixin'>, <class 'scikitplot.annoy._mixins._manifest.ManifestMixin'>, <class 'scikitplot.annoy._mixins._pickle.PickleMixin'>, <class 'scikitplot.annoy._mixins._pickle.PathAwareAnnoy'>, <class 'annoy.Annoy'>, <class 'object'>)
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3)
print("Index dimension:", idx.f)
print("Metric :", idx.metric)
print(idx)
Index dimension: 3
Metric : angular
Annoy(f=3, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3, metric="angular")
print("Index dimension:", idx.f)
print("Metric :", idx.metric)
Index dimension: 3
Metric : angular
# =============================================================
# 2. Add items
# =============================================================
idx.add_item(0, [1, 0, 0])
idx.add_item(1, [0, 1, 0])
idx.add_item(2, [0, 0, 1])
print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric :", idx.metric)
Number of items: 3
Index dimension: 3
Metric : angular
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(10, metric="angular")
print("Index dimension:", idx.f)
print("Metric :", idx.metric)
idx.on_disk_build("annoy_test.annoy")
# help(idx.on_disk_build)
Index dimension: 10
Metric : angular
True
# =============================================================
# 2. Add items
# =============================================================
f=10
n=10
for i in range(n):
if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
# v = []
# for z in range(f):
# v.append(random.gauss(0, 1))
v = [random.gauss(0, 1) for _ in range(f)]
idx.add_item(i, v)
print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric :", idx.metric)
print(idx)
0 / 10 = 0.0
1 / 10 = 0.1
2 / 10 = 0.2
3 / 10 = 0.3
4 / 10 = 0.4
5 / 10 = 0.5
6 / 10 = 0.6
7 / 10 = 0.7
8 / 10 = 0.8
9 / 10 = 0.9
Number of items: 10
Index dimension: 10
Metric : angular
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=annoy_test.annoy)
# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())
# help(idx.build)
Trees: 10
Memory usage: 1620 bytes
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=annoy_test.annoy)
{'dimension': 10, 'metric': 'angular', 'n_items': 10, 'n_trees': 10, 'memory_usage_byte': 1620, 'memory_usage_mib': 0.001544952392578125, 'on_disk_path': 'annoy_test.annoy'}
idx.unbuild()
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=annoy_test.annoy)
idx.build(10)
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=annoy_test.annoy)
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0, metric="angular")
print("Index dimension:", idx.f)
print("Metric :", idx.metric)
Index dimension: 0
Metric : angular
# =============================================================
# 2. Add items
# =============================================================
f=10
n=10
for i in range(n):
if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
# v = []
# for z in range(f):
# v.append(random.gauss(0, 1))
v = [random.gauss(0, 1) for _ in range(f)]
idx.add_item(i, v)
print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric :", idx.metric)
print(idx)
0 / 10 = 0.0
1 / 10 = 0.1
2 / 10 = 0.2
3 / 10 = 0.3
4 / 10 = 0.4
5 / 10 = 0.5
6 / 10 = 0.6
7 / 10 = 0.7
8 / 10 = 0.8
9 / 10 = 0.9
Number of items: 10
Index dimension: 10
Metric : angular
Annoy(f=10, metric='angular', n_items=10, n_trees=0, on_disk_path=None)
# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())
# help(idx.get_n_trees)
Trees: 10
Memory usage: 1880 bytes
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=None)
{'dimension': 10, 'metric': 'angular', 'n_items': 10, 'n_trees': 10, 'memory_usage_byte': 1880, 'memory_usage_mib': 0.00179290771484375, 'on_disk_path': None}
# =============================================================
# 4. Query — return NNSResult
# =============================================================
res = idx.get_nns_by_item(
0,
5,
# search_k = -1,
include_distances=True,
)
print(res)
([0, 2, 4, 5, 6], [0.0, 0.8915294408798218, 0.9434009790420532, 1.050995111465454, 1.2712162733078003])
# =============================================================
# 8. Query using vector
# =============================================================
res2 = idx.get_nns_by_vector(
[random.gauss(0, 1) for _ in range(f)],
5,
include_distances=True
)
print("\nQuery by vector:", res2)
Query by vector: ([4, 9, 0, 6, 8], [0.8781132102012634, 0.9961007237434387, 1.0966964960098267, 1.2096866369247437, 1.2793666124343872])
# =============================================================
# 9. Low-level (non-result) mode
# =============================================================
items = idx.get_nns_by_item(0, 2, include_distances=False)
print("\nLow-level items only:", items)
items_low, d_low = idx.get_nns_by_item(0, 2, include_distances=True)
print("Low-level tuple return:", items_low, d_low)
Low-level items only: [0, 2]
Low-level tuple return: [0, 2] [0.0, 0.8915294408798218]
# =============================================================
# 10. Persistence
# =============================================================
print("\n=== Saving with binary annoy ===")
print(idx)
idx.save("annoy_test.annoy")
print(idx)
print("Loading...")
idx2 = AnnoyIndex(10, metric='angular').load("annoy_test.annoy")
print("Loaded index:", idx2)
=== Saving with binary annoy ===
Annoy(f=10, metric='angular', n_items=10, n_trees=10, on_disk_path=None)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
Loading...
Loaded index: True
# =============================================================
# 11. Raw serialize / deserialize
# =============================================================
print("\n=== Raw serialize ===")
buf = idx.serialize()
new_idx = AnnoyIndex(10, metric='angular')
new_idx.deserialize(buf)
print("Deserialized index n_items:", new_idx.get_n_items())
print(idx)
print(new_idx)
=== Raw serialize ===
Deserialized index n_items: 10
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=None)
idx.unload()
print(idx)
Annoy(f=10, metric='angular', n_items=0, n_trees=0, on_disk_path=None)
# idx.build(10)
idx.load("annoy_test.annoy")
print(idx)
Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy)
# joblib
import joblib
joblib.dump(idx, "test.joblib"), joblib.load("test.joblib")
(['test.joblib'], Annoy(f=10, metric='angular', n_items=10, n_trees=19, on_disk_path=annoy_test.annoy))
from scikitplot import annoy as a
f = 10
idx = a.AnnoyBase(f, "angular")
# Distinct non-zero content so we can see mismatches clearly
for i in range(20):
idx.add_item(i, [float(i)] * f)
idx.build(10)
True
from scikitplot import annoy as a
# Legacy Support
idx = a.Index.from_low_level(idx)
import joblib
joblib.dump(idx, "test.joblib")
['test.joblib']
Total running time of the script: (0 minutes 0.025 seconds)
Related examples