Index (cython) python-api with examples#
An example showing the Index class.
Important
Some parameters are placeholders only and are not processed:
* dtype
* index_dtype
* wrapper_dtype
* random_dtype
import numpy as np
import random; random.seed(0)
from pprint import pprint
# from annoy import Annoy, AnnoyIndex
# from scikitplot.cexternals._annoy import Annoy, AnnoyIndex
# from scikitplot.annoy import Annoy, AnnoyIndex, Index
from scikitplot.annoy._annoy import Index
print(Index.__doc__)
Index(f: Optional[int] = None, metric: Optional[str] = None, int n_neighbors: int = 5, *, on_disk_path: Optional[str] = None, bool prefault: bool = False, seed: Optional[int] = None, verbose: Optional[int] = None, int schema_version: int = 0, str dtype: str = 'float32', str index_dtype: str = 'int32', str wrapper_dtype: str = 'uint64', str random_dtype: str = 'uint64', **kwargs)
Annoy Approximate Nearest Neighbors Index.
This is a Cython-powered Python wrapper around the Annoy C++ library.
Parameters
----------
f : int or None, default=None
Embedding dimension. If 0 or None, dimension is inferred from first
vector added. Must be positive for immediate index construction.
metric : str or None, default=None
Distance metric. Supported values:
* "angular", "cosine" → cosine-like distance
* "euclidean", "l2", "lstsq" → L2 distance
* "manhattan", "l1", "cityblock", "taxicab" → L1 distance
* "dot", "@", ".", "dotproduct", "inner", "innerproduct" → negative dot product
* "hamming" → bitwise Hamming distance
If None and f > 0, defaults to "angular" with FutureWarning.
n_neighbors : int, default=5
Default number of neighbors for queries (estimator parameter).
on_disk_path : str or None, default=None
Path for on-disk building. If provided, enables memory-efficient
building for large indices.
prefault : bool, default=False
Whether to prefault pages when loading (may improve query latency).
seed : int or None, default=None
Random seed for tree construction. If None, uses Annoy's default.
Value 0 is treated as "use default" and emits a UserWarning.
verbose : int or None, default=None
Verbosity level (clamped to [-2, 2]). Level >= 1 enables logging.
schema_version : int, default=0
Pickle schema version marker (does not affect on-disk format).
dtype : str, default='float32'
Data type: float16, float32, float64, float80, float128
index_dtype : str, default='int32'
Index type: int32, int64
wrapper_dtype : str, default='uint64'
Wrapper type (for Hamming): uint32, uint64
random_dtype : str, default='uint64'
Random seed type
**kwargs
Future extensibility
Attributes
----------
f : int
Embedding dimension (0 means "unset / lazy").
metric : str or None
Canonical metric name, or None if not configured.
ptr : AnnoyIndexInterface*
Pointer to C++ index (NULL if not constructed).
# State Indicators (Internal)
_f_valid : bool
True if f has been set (> 0)
_metric_valid : bool
True if metric has been configured
_index_constructed : bool
True if C++ index exists (ptr != NULL)
Examples
--------
>>> index = Index(f=128, metric='angular', seed=42)
>>> index.add_item(0, [0.1] * 128)
>>> index.add_item(1, [0.2] * 128)
>>> index.build(n_trees=10)
>>> neighbors, distances = index.get_nns_by_item(0, n=5, include_distances=True)
set dtype:
>>> # Standard usage (float32)
>>> index = Index(f=128, metric='angular', dtype='float32')
>>>
>>> # High precision (float64)
>>> index = Index(f=128, metric='euclidean', dtype='float64')
>>>
>>> # Half precision (float16) - future
>>> # index = Index(f=128, metric='angular', dtype='float16')
index = Index()
index
index.set_params(**index.get_params())
a = index.clone()
a
Index(10, metric= '.')
Index(10, metric= 'l1')
Index(10, metric= 'l2')
Index(10, metric= 'hamming')
import numpy as np
# Create index
index = Index(128)
# Add normalized vectors
for i in range(1000):
v = np.random.randn(128)
v = v / np.linalg.norm(v) # Normalize
index.add_item(i, v)
# Build and query
index.build(10)
neighbors, distances = index.get_nns_by_item(0, 10, include_distances=True)
neighbors, distances
/home/circleci/.pyenv/versions/3.11.14/lib/python3.11/site-packages/sphinx_gallery/gen_rst.py:801: FutureWarning:
The default metric will be removed in a future version. Please pass metric='angular' explicitly.
([0, 409, 627, 786, 228, 780, 636, 536, 325, 127], [0.0003452669770922512, 1.2368390560150146, 1.2540233135223389, 1.2635881900787354, 1.2723208665847778, 1.2764437198638916, 1.2816716432571411, 1.2914178371429443, 1.2927879095077515, 1.301866054534912])
index.get_params()
{'f': 128, 'metric': 'angular', 'n_neighbors': 5, 'seed': None, 'verbose': None, 'on_disk_path': None, 'prefault': False, 'schema_version': 0, 'dtype': 'float32', 'index_dtype': 'int32', 'wrapper_dtype': 'uint64', 'random_dtype': 'uint64'}
with index.clone() as idx:
pprint(idx.get_state(), sort_dicts=False)
{'__version__': '1.0',
'params': {'f': 128,
'metric': 'angular',
'n_neighbors': 5,
'seed': None,
'verbose': None,
'on_disk_path': None,
'prefault': False,
'schema_version': 0,
'dtype': 'float32',
'index_dtype': 'int32',
'wrapper_dtype': 'uint64',
'random_dtype': 'uint64'},
'constructed': True,
'n_items': 0,
'n_trees': 0,
'index_data': None}
Total running time of the script: (0 minutes 0.169 seconds)
Related examples