Retrieves documents from the CouchbaseSearchDocumentStore by embedding similarity.
The similarity is dependent on the vector_search_index used in the CouchbaseSearchDocumentStore and the chosen metric
during the creation of the index (i.e. dot product, or l2 norm). See CouchbaseSearchDocumentStore for more
information.
Usage example:
import numpy as np
from couchbase_haystack import CouchbaseSearchDocumentStore, CouchbaseSearchEmbeddingRetriever, CouchbasePasswordAuthenticator
from haystack.utils import Secret
store = CouchbaseSearchDocumentStore(
cluster_connection_string=Secret.from_env_var("CB_CONNECTION_STRING"),
authenticator=CouchbasePasswordAuthenticator(
username=Secret.from_env_var("CB_USERNAME"),
password=Secret.from_env_var("CB_PASSWORD")
),
bucket="haystack_test_bucket",
scope="scope_name",
collection="collection_name",
vector_search_index="vector_index"
)
retriever = CouchbaseSearchEmbeddingRetriever(document_store=store)
results = retriever.run(query_embedding=np.random.random(768).tolist())
print(results["documents"])
The example above retrieves the 10 most similar documents to a random query embedding from the
CouchbaseSearchDocumentStore. Note that dimensions of the query_embedding must match the dimensions of the embeddings
stored in the CouchbaseSearchDocumentStore.
Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
| @component
class CouchbaseSearchEmbeddingRetriever:
"""
Retrieves documents from the CouchbaseSearchDocumentStore by embedding similarity.
The similarity is dependent on the vector_search_index used in the CouchbaseSearchDocumentStore and the chosen metric
during the creation of the index (i.e. dot product, or l2 norm). See CouchbaseSearchDocumentStore for more
information.
Usage example:
```python
import numpy as np
from couchbase_haystack import CouchbaseSearchDocumentStore, CouchbaseSearchEmbeddingRetriever, CouchbasePasswordAuthenticator
from haystack.utils import Secret
store = CouchbaseSearchDocumentStore(
cluster_connection_string=Secret.from_env_var("CB_CONNECTION_STRING"),
authenticator=CouchbasePasswordAuthenticator(
username=Secret.from_env_var("CB_USERNAME"),
password=Secret.from_env_var("CB_PASSWORD")
),
bucket="haystack_test_bucket",
scope="scope_name",
collection="collection_name",
vector_search_index="vector_index"
)
retriever = CouchbaseSearchEmbeddingRetriever(document_store=store)
results = retriever.run(query_embedding=np.random.random(768).tolist())
print(results["documents"])
```
The example above retrieves the 10 most similar documents to a random query embedding from the
CouchbaseSearchDocumentStore. Note that dimensions of the query_embedding must match the dimensions of the embeddings
stored in the CouchbaseSearchDocumentStore.
"""
def __init__(
self,
*,
document_store: CouchbaseSearchDocumentStore,
top_k: int = 10,
):
"""
Create the CouchbaseSearchDocumentStore component.
Note: Currently, the filter option is not supported with embedding queries.
Instead, you can provide a couchbase search query while running the embedding query.
The embedding query and search query are combined using an OR operation.
Args:
document_store: An instance of CouchbaseSearchDocumentStore.
top_k: Maximum number of Documents to return.
Raises:
ValueError: If document_store is not an instance of CouchbaseSearchDocumentStore.
"""
if not isinstance(document_store, CouchbaseSearchDocumentStore):
msg = "document_store must be an instance of CouchbaseSearchDocumentStore"
raise ValueError(msg)
self.document_store = document_store
self.top_k = top_k
def to_dict(self) -> Dict[str, Any]:
"""
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
"""
return default_to_dict(
self,
top_k=self.top_k,
document_store=self.document_store.to_dict(),
)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "CouchbaseSearchDocumentStore":
"""
Deserializes the component from a dictionary.
:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
data["init_parameters"]["document_store"] = CouchbaseSearchDocumentStore.from_dict(
data["init_parameters"]["document_store"]
)
return default_from_dict(cls, data)
@component.output_types(documents=List[Document])
def run(
self,
query_embedding: List[float],
top_k: Optional[int] = None,
search_query: Optional[SearchQuery] = None,
limit: Optional[int] = None,
) -> Dict[str, List[Document]]:
"""
Retrieve documents from the CouchbaseSearchDocumentStore, based on the provided embedding similarity.
Args:
query_embedding: Embedding of the query.
top_k: Maximum number of Documents to be returned from vector query.
Overrides the value specified at initialization.
search_query: Search filters param which is parsed to the Couchbase search query.
The vector query and search query are ORed operation.
limit: Maximum number of Documents to be return by the couchbase fts search request.
Default value is top_k.
Returns:
A dictionary with the following keys:
- documents: List of Documents most similar to the given query_embedding
"""
top_k = top_k or self.top_k
docs = self.document_store._embedding_retrieval(
query_embedding=query_embedding, top_k=top_k, search_query=search_query, limit=limit
)
return {"documents": docs}
|
__init__
__init__(*, document_store: CouchbaseSearchDocumentStore, top_k: int = 10)
Note: Currently, the filter option is not supported with embedding queries.
Instead, you can provide a couchbase search query while running the embedding query.
The embedding query and search query are combined using an OR operation.
Parameters:
-
document_store
(CouchbaseSearchDocumentStore
)
–
An instance of CouchbaseSearchDocumentStore.
-
top_k
(int
, default:
10
)
–
Maximum number of Documents to return.
Raises:
-
ValueError
–
If document_store is not an instance of CouchbaseSearchDocumentStore.
Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
| def __init__(
self,
*,
document_store: CouchbaseSearchDocumentStore,
top_k: int = 10,
):
"""
Create the CouchbaseSearchDocumentStore component.
Note: Currently, the filter option is not supported with embedding queries.
Instead, you can provide a couchbase search query while running the embedding query.
The embedding query and search query are combined using an OR operation.
Args:
document_store: An instance of CouchbaseSearchDocumentStore.
top_k: Maximum number of Documents to return.
Raises:
ValueError: If document_store is not an instance of CouchbaseSearchDocumentStore.
"""
if not isinstance(document_store, CouchbaseSearchDocumentStore):
msg = "document_store must be an instance of CouchbaseSearchDocumentStore"
raise ValueError(msg)
self.document_store = document_store
self.top_k = top_k
|
to_dict
to_dict() -> Dict[str, Any]
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
| def to_dict(self) -> Dict[str, Any]:
"""
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
"""
return default_to_dict(
self,
top_k=self.top_k,
document_store=self.document_store.to_dict(),
)
|
from_dict
classmethod
from_dict(data: Dict[str, Any]) -> CouchbaseSearchDocumentStore
Deserializes the component from a dictionary.
:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
| @classmethod
def from_dict(cls, data: Dict[str, Any]) -> "CouchbaseSearchDocumentStore":
"""
Deserializes the component from a dictionary.
:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
data["init_parameters"]["document_store"] = CouchbaseSearchDocumentStore.from_dict(
data["init_parameters"]["document_store"]
)
return default_from_dict(cls, data)
|
run
run(
query_embedding: List[float],
top_k: Optional[int] = None,
search_query: Optional[SearchQuery] = None,
limit: Optional[int] = None,
) -> Dict[str, List[Document]]
Retrieve documents from the CouchbaseSearchDocumentStore, based on the provided embedding similarity.
Parameters:
-
query_embedding
(List[float]
)
–
-
top_k
(Optional[int]
, default:
None
)
–
Maximum number of Documents to be returned from vector query.
Overrides the value specified at initialization.
-
search_query
(Optional[SearchQuery]
, default:
None
)
–
Search filters param which is parsed to the Couchbase search query.
The vector query and search query are ORed operation.
-
limit
(Optional[int]
, default:
None
)
–
Maximum number of Documents to be return by the couchbase fts search request.
Default value is top_k.
Returns:
-
Dict[str, List[Document]]
–
A dictionary with the following keys:
-
Dict[str, List[Document]]
–
- documents: List of Documents most similar to the given query_embedding
Source code in src/couchbase_haystack/components/retrievers/embedding_retriever.py
| @component.output_types(documents=List[Document])
def run(
self,
query_embedding: List[float],
top_k: Optional[int] = None,
search_query: Optional[SearchQuery] = None,
limit: Optional[int] = None,
) -> Dict[str, List[Document]]:
"""
Retrieve documents from the CouchbaseSearchDocumentStore, based on the provided embedding similarity.
Args:
query_embedding: Embedding of the query.
top_k: Maximum number of Documents to be returned from vector query.
Overrides the value specified at initialization.
search_query: Search filters param which is parsed to the Couchbase search query.
The vector query and search query are ORed operation.
limit: Maximum number of Documents to be return by the couchbase fts search request.
Default value is top_k.
Returns:
A dictionary with the following keys:
- documents: List of Documents most similar to the given query_embedding
"""
top_k = top_k or self.top_k
docs = self.document_store._embedding_retrieval(
query_embedding=query_embedding, top_k=top_k, search_query=search_query, limit=limit
)
return {"documents": docs}
|