Skip to content

CouchbaseSearchEmbeddingRetriever¤

Retrieves documents from the CouchbaseSearchDocumentStore by embedding similarity.

The similarity is dependent on the vector_search_index used in the CouchbaseSearchDocumentStore and the chosen metric during the creation of the index (i.e. dot product, or l2 norm). See CouchbaseSearchDocumentStore for more information.

Usage example:

import numpy as np
from couchbase_haystack import CouchbaseSearchDocumentStore, CouchbaseSearchEmbeddingRetriever, CouchbasePasswordAuthenticator
from haystack.utils import Secret

store = CouchbaseSearchDocumentStore(
    cluster_connection_string=Secret.from_env_var("CB_CONNECTION_STRING"),
    authenticator=CouchbasePasswordAuthenticator(
        username=Secret.from_env_var("CB_USERNAME"),
        password=Secret.from_env_var("CB_PASSWORD")
    ),
    bucket="haystack_test_bucket",
    scope="scope_name",
    collection="collection_name",
    vector_search_index="vector_index"
)
retriever = CouchbaseSearchEmbeddingRetriever(document_store=store)

results = retriever.run(query_embedding=np.random.random(768).tolist())
print(results["documents"])

The example above retrieves the 10 most similar documents to a random query embedding from the CouchbaseSearchDocumentStore. Note that dimensions of the query_embedding must match the dimensions of the embeddings stored in the CouchbaseSearchDocumentStore.

Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
@component
class CouchbaseSearchEmbeddingRetriever:
    """
    Retrieves documents from the CouchbaseSearchDocumentStore by embedding similarity.

    The similarity is dependent on the vector_search_index used in the CouchbaseSearchDocumentStore and the chosen metric
    during the creation of the index (i.e. dot product, or l2 norm). See CouchbaseSearchDocumentStore for more
    information.

    Usage example:

    ```python
    import numpy as np
    from couchbase_haystack import CouchbaseSearchDocumentStore, CouchbaseSearchEmbeddingRetriever, CouchbasePasswordAuthenticator
    from haystack.utils import Secret

    store = CouchbaseSearchDocumentStore(
        cluster_connection_string=Secret.from_env_var("CB_CONNECTION_STRING"),
        authenticator=CouchbasePasswordAuthenticator(
            username=Secret.from_env_var("CB_USERNAME"),
            password=Secret.from_env_var("CB_PASSWORD")
        ),
        bucket="haystack_test_bucket",
        scope="scope_name",
        collection="collection_name",
        vector_search_index="vector_index"
    )
    retriever = CouchbaseSearchEmbeddingRetriever(document_store=store)

    results = retriever.run(query_embedding=np.random.random(768).tolist())
    print(results["documents"])
    ```

    The example above retrieves the 10 most similar documents to a random query embedding from the
    CouchbaseSearchDocumentStore. Note that dimensions of the query_embedding must match the dimensions of the embeddings
    stored in the CouchbaseSearchDocumentStore.
    """

    def __init__(
        self,
        *,
        document_store: CouchbaseSearchDocumentStore,
        top_k: int = 10,
    ):
        """
        Create the CouchbaseSearchDocumentStore component.

        Note: Currently, the filter option is not supported with embedding queries.
        Instead, you can provide a couchbase search query while running the embedding query.
        The embedding query and search query are combined using an OR operation.

        Args:
            document_store: An instance of CouchbaseSearchDocumentStore.
            top_k: Maximum number of Documents to return.

        Raises:
            ValueError: If document_store is not an instance of CouchbaseSearchDocumentStore.
        """
        if not isinstance(document_store, CouchbaseSearchDocumentStore):
            msg = "document_store must be an instance of CouchbaseSearchDocumentStore"
            raise ValueError(msg)

        self.document_store = document_store
        self.top_k = top_k

    def to_dict(self) -> Dict[str, Any]:
        """
        Serializes the component to a dictionary.

        :returns:
            Dictionary with serialized data.
        """
        return default_to_dict(
            self,
            top_k=self.top_k,
            document_store=self.document_store.to_dict(),
        )

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "CouchbaseSearchDocumentStore":
        """
        Deserializes the component from a dictionary.

        :param data:
            Dictionary to deserialize from.
        :returns:
              Deserialized component.
        """
        data["init_parameters"]["document_store"] = CouchbaseSearchDocumentStore.from_dict(
            data["init_parameters"]["document_store"]
        )
        return default_from_dict(cls, data)

    @component.output_types(documents=List[Document])
    def run(
        self,
        query_embedding: List[float],
        top_k: Optional[int] = None,
        search_query: Optional[SearchQuery] = None,
        limit: Optional[int] = None,
    ) -> Dict[str, List[Document]]:
        """
        Retrieve documents from the CouchbaseSearchDocumentStore, based on the provided embedding similarity.

        Args:
            query_embedding: Embedding of the query.
            top_k: Maximum number of Documents to be returned from vector query.
                  Overrides the value specified at initialization.
            search_query: Search filters param which is parsed to the Couchbase search query.
                        The vector query and search query are ORed operation.
            limit: Maximum number of Documents to be return by the couchbase fts search request.
                  Default value is top_k.

        Returns:
            A dictionary with the following keys:
            - documents: List of Documents most similar to the given query_embedding
        """
        top_k = top_k or self.top_k

        docs = self.document_store._embedding_retrieval(
            query_embedding=query_embedding, top_k=top_k, search_query=search_query, limit=limit
        )
        return {"documents": docs}

__init__ ¤

__init__(*, document_store: CouchbaseSearchDocumentStore, top_k: int = 10)

Note: Currently, the filter option is not supported with embedding queries. Instead, you can provide a couchbase search query while running the embedding query. The embedding query and search query are combined using an OR operation.

Parameters:

  • document_store (CouchbaseSearchDocumentStore) –

    An instance of CouchbaseSearchDocumentStore.

  • top_k (int, default: 10 ) –

    Maximum number of Documents to return.

Raises:

  • ValueError

    If document_store is not an instance of CouchbaseSearchDocumentStore.

Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
def __init__(
    self,
    *,
    document_store: CouchbaseSearchDocumentStore,
    top_k: int = 10,
):
    """
    Create the CouchbaseSearchDocumentStore component.

    Note: Currently, the filter option is not supported with embedding queries.
    Instead, you can provide a couchbase search query while running the embedding query.
    The embedding query and search query are combined using an OR operation.

    Args:
        document_store: An instance of CouchbaseSearchDocumentStore.
        top_k: Maximum number of Documents to return.

    Raises:
        ValueError: If document_store is not an instance of CouchbaseSearchDocumentStore.
    """
    if not isinstance(document_store, CouchbaseSearchDocumentStore):
        msg = "document_store must be an instance of CouchbaseSearchDocumentStore"
        raise ValueError(msg)

    self.document_store = document_store
    self.top_k = top_k

to_dict ¤

to_dict() -> Dict[str, Any]

Serializes the component to a dictionary.

:returns: Dictionary with serialized data.

Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
def to_dict(self) -> Dict[str, Any]:
    """
    Serializes the component to a dictionary.

    :returns:
        Dictionary with serialized data.
    """
    return default_to_dict(
        self,
        top_k=self.top_k,
        document_store=self.document_store.to_dict(),
    )

from_dict classmethod ¤

from_dict(data: Dict[str, Any]) -> CouchbaseSearchDocumentStore

Deserializes the component from a dictionary.

:param data: Dictionary to deserialize from. :returns: Deserialized component.

Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "CouchbaseSearchDocumentStore":
    """
    Deserializes the component from a dictionary.

    :param data:
        Dictionary to deserialize from.
    :returns:
          Deserialized component.
    """
    data["init_parameters"]["document_store"] = CouchbaseSearchDocumentStore.from_dict(
        data["init_parameters"]["document_store"]
    )
    return default_from_dict(cls, data)

run ¤

run(
    query_embedding: List[float],
    top_k: Optional[int] = None,
    search_query: Optional[SearchQuery] = None,
    limit: Optional[int] = None,
) -> Dict[str, List[Document]]

Retrieve documents from the CouchbaseSearchDocumentStore, based on the provided embedding similarity.

Parameters:

  • query_embedding (List[float]) –

    Embedding of the query.

  • top_k (Optional[int], default: None ) –

    Maximum number of Documents to be returned from vector query. Overrides the value specified at initialization.

  • search_query (Optional[SearchQuery], default: None ) –

    Search filters param which is parsed to the Couchbase search query. The vector query and search query are ORed operation.

  • limit (Optional[int], default: None ) –

    Maximum number of Documents to be return by the couchbase fts search request. Default value is top_k.

Returns:

  • Dict[str, List[Document]]

    A dictionary with the following keys:

  • Dict[str, List[Document]]
    • documents: List of Documents most similar to the given query_embedding
Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
@component.output_types(documents=List[Document])
def run(
    self,
    query_embedding: List[float],
    top_k: Optional[int] = None,
    search_query: Optional[SearchQuery] = None,
    limit: Optional[int] = None,
) -> Dict[str, List[Document]]:
    """
    Retrieve documents from the CouchbaseSearchDocumentStore, based on the provided embedding similarity.

    Args:
        query_embedding: Embedding of the query.
        top_k: Maximum number of Documents to be returned from vector query.
              Overrides the value specified at initialization.
        search_query: Search filters param which is parsed to the Couchbase search query.
                    The vector query and search query are ORed operation.
        limit: Maximum number of Documents to be return by the couchbase fts search request.
              Default value is top_k.

    Returns:
        A dictionary with the following keys:
        - documents: List of Documents most similar to the given query_embedding
    """
    top_k = top_k or self.top_k

    docs = self.document_store._embedding_retrieval(
        query_embedding=query_embedding, top_k=top_k, search_query=search_query, limit=limit
    )
    return {"documents": docs}