Index (cython) python-api with examples#

An example showing the Index class.

Important

Some parameters are placeholders only and are not processed:

* dtype
* index_dtype
* wrapper_dtype
* random_dtype
import numpy as np
import random; random.seed(0)
from pprint import pprint

# from annoy import Annoy, AnnoyIndex
# from scikitplot.cexternals._annoy import Annoy, AnnoyIndex
# from scikitplot.annoy import Annoy, AnnoyIndex, Index
from scikitplot.annoy._annoy import Index

print(Index.__doc__)
Index(f: Optional[int] = None, metric: Optional[str] = None, int n_neighbors: int = 5, *, on_disk_path: Optional[str] = None, bool prefault: bool = False, seed: Optional[int] = None, verbose: Optional[int] = None, int schema_version: int = 0, str dtype: str = 'float32', str index_dtype: str = 'int32', str wrapper_dtype: str = 'uint64', str random_dtype: str = 'uint64', **kwargs)

Annoy Approximate Nearest Neighbors Index.

This is a Cython-powered Python wrapper around the Annoy C++ library.

Parameters
----------
f : int or None, default=None
    Embedding dimension. If 0 or None, dimension is inferred from first
    vector added. Must be positive for immediate index construction.
metric : str or None, default=None
    Distance metric. Supported values:
    * "angular", "cosine" → cosine-like distance
    * "euclidean", "l2", "lstsq" → L2 distance
    * "manhattan", "l1", "cityblock", "taxicab" → L1 distance
    * "dot", "@", ".", "dotproduct", "inner", "innerproduct" → negative dot product
    * "hamming" → bitwise Hamming distance
    If None and f > 0, defaults to "angular" with FutureWarning.
n_neighbors : int, default=5
    Default number of neighbors for queries (estimator parameter).
on_disk_path : str or None, default=None
    Path for on-disk building. If provided, enables memory-efficient
    building for large indices.
prefault : bool, default=False
    Whether to prefault pages when loading (may improve query latency).
seed : int or None, default=None
    Random seed for tree construction. If None, uses Annoy's default.
    Value 0 is treated as "use default" and emits a UserWarning.
verbose : int or None, default=None
    Verbosity level (clamped to [-2, 2]). Level >= 1 enables logging.
schema_version : int, default=0
    Pickle schema version marker (does not affect on-disk format).
dtype : str, default='float32'
    Data type: float16, float32, float64, float80, float128
index_dtype : str, default='int32'
    Index type: int32, int64
wrapper_dtype : str, default='uint64'
    Wrapper type (for Hamming): uint32, uint64
random_dtype : str, default='uint64'
    Random seed type
**kwargs
    Future extensibility

Attributes
----------
f : int
    Embedding dimension (0 means "unset / lazy").
metric : str or None
    Canonical metric name, or None if not configured.
ptr : AnnoyIndexInterface*
    Pointer to C++ index (NULL if not constructed).

# State Indicators (Internal)
_f_valid : bool
    True if f has been set (> 0)
_metric_valid : bool
    True if metric has been configured
_index_constructed : bool
    True if C++ index exists (ptr != NULL)

Examples
--------
>>> index = Index(f=128, metric='angular', seed=42)
>>> index.add_item(0, [0.1] * 128)
>>> index.add_item(1, [0.2] * 128)
>>> index.build(n_trees=10)
>>> neighbors, distances = index.get_nns_by_item(0, n=5, include_distances=True)

set dtype:

>>> # Standard usage (float32)
>>> index = Index(f=128, metric='angular', dtype='float32')
>>>
>>> # High precision (float64)
>>> index = Index(f=128, metric='euclidean', dtype='float64')
>>>
>>> # Half precision (float16) - future
>>> # index = Index(f=128, metric='angular', dtype='float16')
index = Index()
index
Annoy
Parameters
ParameterValue
f0
metricNone
n_neighbors5
seedNone
verboseNone
on_disk_pathNone
prefaultFalse
schema_version0
dtype'float32'
index_dtype'int32'
wrapper_dtype'uint64'
random_dtype'uint64'


index.set_params(**index.get_params())
Annoy
Parameters
ParameterValue
f0
metricNone
n_neighbors5
seedNone
verboseNone
on_disk_pathNone
prefaultFalse
schema_version0
dtype'float32'
index_dtype'int32'
wrapper_dtype'uint64'
random_dtype'uint64'


a = index.clone()
a
Annoy
Parameters
ParameterValue
f0
metricNone
n_neighbors5
seedNone
verboseNone
on_disk_pathNone
prefaultFalse
schema_version0
dtype'float32'
index_dtype'int32'
wrapper_dtype'uint64'
random_dtype'uint64'


Index(10, metric= '.')
Annoy
Parameters
ParameterValue
f10
metric'.'
n_neighbors5
seedNone
verboseNone
on_disk_pathNone
prefaultFalse
schema_version0
dtype'float32'
index_dtype'int32'
wrapper_dtype'uint64'
random_dtype'uint64'


Index(10, metric= 'l1')
Annoy
Parameters
ParameterValue
f10
metric'l1'
n_neighbors5
seedNone
verboseNone
on_disk_pathNone
prefaultFalse
schema_version0
dtype'float32'
index_dtype'int32'
wrapper_dtype'uint64'
random_dtype'uint64'


Index(10, metric= 'l2')
Annoy
Parameters
ParameterValue
f10
metric'l2'
n_neighbors5
seedNone
verboseNone
on_disk_pathNone
prefaultFalse
schema_version0
dtype'float32'
index_dtype'int32'
wrapper_dtype'uint64'
random_dtype'uint64'


Index(10, metric= 'hamming')
Annoy
Parameters
ParameterValue
f10
metric'hamming'
n_neighbors5
seedNone
verboseNone
on_disk_pathNone
prefaultFalse
schema_version0
dtype'float32'
index_dtype'int32'
wrapper_dtype'uint64'
random_dtype'uint64'


import numpy as np

# Create index
index = Index(128)

# Add normalized vectors
for i in range(1000):
    v = np.random.randn(128)
    v = v / np.linalg.norm(v)  # Normalize
    index.add_item(i, v)

# Build and query
index.build(10)
neighbors, distances = index.get_nns_by_item(0, 10, include_distances=True)
neighbors, distances
/home/circleci/.pyenv/versions/3.11.14/lib/python3.11/site-packages/sphinx_gallery/gen_rst.py:801: FutureWarning:

The default metric will be removed in a future version. Please pass metric='angular' explicitly.


([0, 409, 627, 786, 228, 780, 636, 536, 325, 127], [0.0003452669770922512, 1.2368390560150146, 1.2540233135223389, 1.2635881900787354, 1.2723208665847778, 1.2764437198638916, 1.2816716432571411, 1.2914178371429443, 1.2927879095077515, 1.301866054534912])
index.get_params()
{'f': 128, 'metric': 'angular', 'n_neighbors': 5, 'seed': None, 'verbose': None, 'on_disk_path': None, 'prefault': False, 'schema_version': 0, 'dtype': 'float32', 'index_dtype': 'int32', 'wrapper_dtype': 'uint64', 'random_dtype': 'uint64'}
with index.clone() as idx:
    pprint(idx.get_state(), sort_dicts=False)
{'__version__': '1.0',
 'params': {'f': 128,
            'metric': 'angular',
            'n_neighbors': 5,
            'seed': None,
            'verbose': None,
            'on_disk_path': None,
            'prefault': False,
            'schema_version': 0,
            'dtype': 'float32',
            'index_dtype': 'int32',
            'wrapper_dtype': 'uint64',
            'random_dtype': 'uint64'},
 'constructed': True,
 'n_items': 0,
 'n_trees': 0,
 'index_data': None}

Tags: level: beginner purpose: showcase

Total running time of the script: (0 minutes 0.169 seconds)

Related examples

annoy.Index to NPY or CSV with examples

annoy.Index to NPY or CSV with examples

annoy.Annoy legacy c-api with examples

annoy.Annoy legacy c-api with examples

annoy.Index python-api with examples

annoy.Index python-api with examples

C++ mode basics: cppclass and libcpp containers

C++ mode basics: cppclass and libcpp containers

Gallery generated by Sphinx-Gallery