"""Functions to examine data and vectors produced by SPA."""
from itertools import combinations
import nengo.utils.numpy as npext
import numpy as np
from nengo.exceptions import ValidationError
from nengo_spa.semantic_pointer import SemanticPointer
from nengo_spa.typechecks import is_iterable
from nengo_spa.vocabulary import Vocabulary
[docs]def similarity(data, vocab, normalize=False):
"""Return the similarity between simulation data and Semantic Pointers.
Computes the dot products between all Semantic Pointers in the Vocabulary
and the simulation data for each timestep. If ``normalize=True``,
normalizes all vectors to compute the cosine similarity.
Parameters
----------
data: (D,) or (T, D) array_like
The *D*-dimensional data for *T* timesteps used for comparison.
vocab: Vocabulary or array_like
Vocabulary (or list of vectors) used to calculate the similarity
values.
normalize : bool, optional
Whether to normalize all vectors, to compute the cosine similarity.
"""
if isinstance(data, SemanticPointer):
data = data.v
if isinstance(vocab, Vocabulary):
vectors = vocab.vectors
elif is_iterable(vocab):
if isinstance(next(iter(vocab)), SemanticPointer):
vocab = [p.v for p in vocab]
vectors = np.array(vocab, copy=False, ndmin=2)
else:
raise ValidationError(
f"{type(vocab).__name__:r} object is not a valid vocabulary", attr="vocab"
)
dots = np.dot(vectors, data.T)
if normalize:
# Zero-norm vectors should return zero, so avoid divide-by-zero error
eps = np.nextafter(0, 1) # smallest float above zero
dnorm = np.maximum(npext.norm(data.T, axis=0, keepdims=True), eps)
vnorm = np.maximum(npext.norm(vectors, axis=1, keepdims=True), eps)
if len(dots.shape) == 1:
vnorm = np.squeeze(vnorm)
dots /= dnorm
dots /= vnorm
return dots.T
[docs]def pairs(vocab):
"""Return expressions for all possible combinations to bind *vocab*'s keys.
Examples
--------
>>> vocab = nengo_spa.Vocabulary(32)
>>> vocab.populate('A; B; C')
>>> sorted(nengo_spa.pairs(vocab))
['A*B', 'A*C', 'B*C']
"""
return set(x + "*" + y for x, y in combinations(vocab.keys(), 2))
[docs]def text(
v,
vocab,
minimum_count=1,
maximum_count=None,
threshold=0.1,
join=";",
terms=None,
normalize=False,
):
"""Return a human-readable text version of the provided vector.
This is meant to give a quick text version of a vector for display
purposes. To do this, compute the dot product between the vector
and all the terms in the vocabulary. The top few vectors are
chosen for inclusion in the text. It will try to only return
terms with a match above the *threshold*, but will always return
at least *minimum_count* and at most maximum_count terms. Terms
are sorted from most to least similar.
Parameters
----------
v : SemanticPointer or array_like
The vector to convert into text.
minimum_count : int, optional
Always return at least this many terms in the text.
maximum_count : int, optional
Never return more than this many terms in the text.
If None, all terms will be returned.
threshold : float, optional
How small a similarity for a term to be ignored.
join : str, optional
The text separator to use between terms.
terms : list, optional
Only consider terms in this list of strings.
normalize : bool
Whether to normalize the vector before computing similarity.
"""
if not isinstance(v, SemanticPointer):
v = SemanticPointer(v)
if normalize:
v = v.normalized()
if terms is None:
terms = vocab.keys()
vectors = vocab.vectors
else:
vectors = vocab.parse_n(*terms)
matches = list(zip(similarity(v, vectors), terms))
matches.sort()
matches.reverse()
r = []
for m in matches:
if minimum_count is not None and len(r) < minimum_count:
r.append(m)
elif maximum_count is not None and len(r) == maximum_count:
break
elif threshold is None or m[0] > threshold:
r.append(m)
else:
break
return join.join([f"{sim:0.2f}{key}" for (sim, key) in r])