|
13 | 13 | import pytest |
14 | 14 |
|
15 | 15 | from haystack import Document |
| 16 | +from haystack.dataclasses import ByteStream, SparseEmbedding |
16 | 17 | from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError |
17 | 18 | from haystack.document_stores.in_memory import InMemoryDocumentStore |
18 | 19 | from haystack.testing.document_store import ( |
@@ -146,6 +147,26 @@ def test_save_to_disk_and_load_from_disk(self, in_memory_doc_store: InMemoryDocu |
146 | 147 | assert list(document_store_loaded.storage.values()) == docs |
147 | 148 | assert document_store_loaded.to_dict() == in_memory_doc_store.to_dict() |
148 | 149 |
|
| 150 | + def test_save_to_disk_and_load_from_disk_with_blob_and_sparse_embedding( |
| 151 | + self, in_memory_doc_store: InMemoryDocumentStore, tmp_dir: str |
| 152 | + ) -> None: |
| 153 | + doc = Document( |
| 154 | + content="document with binary data", |
| 155 | + blob=ByteStream(data=b"binary data", mime_type="image/png"), |
| 156 | + sparse_embedding=SparseEmbedding(indices=[0, 5], values=[0.1, 0.9]), |
| 157 | + ) |
| 158 | + in_memory_doc_store.write_documents([doc]) |
| 159 | + save_path = tmp_dir + "/in_memory_doc_store.json" |
| 160 | + in_memory_doc_store.save_to_disk(save_path) |
| 161 | + document_store_loaded = InMemoryDocumentStore.load_from_disk(save_path) |
| 162 | + |
| 163 | + loaded_doc = document_store_loaded.filter_documents()[0] |
| 164 | + assert isinstance(loaded_doc.blob, ByteStream) |
| 165 | + assert isinstance(loaded_doc.sparse_embedding, SparseEmbedding) |
| 166 | + assert loaded_doc == doc |
| 167 | + # The loaded store must be savable again |
| 168 | + document_store_loaded.save_to_disk(save_path) |
| 169 | + |
149 | 170 | def test_invalid_bm25_algorithm(self): |
150 | 171 | with pytest.raises(ValueError, match="BM25 algorithm 'invalid' is not supported"): |
151 | 172 | InMemoryDocumentStore(bm25_algorithm="invalid") # type: ignore[arg-type] |
|
0 commit comments