Skip to content

Commit f0fbf2b

Browse files
committed
feature #880 [Store] Add metadata filtering support to InMemoryStore and CacheStore (camilleislasse)
This PR was merged into the main branch. Discussion ---------- [Store] Add metadata filtering support to `InMemoryStore` and `CacheStore` | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | yes | License | MIT This PR adds metadata filtering support to local stores (InMemoryStore and CacheStore). ### Changes - Add optional filter parameter to query() method accepting a callable - Filter documents by metadata before distance calculation - Add comprehensive test coverage for filtering scenarios - Add documentation for local stores ### Usage ```php $results = $store->query($vector, [ 'filter' => fn(VectorDocument $doc) => $doc->metadata['size'] === 'S', 'maxItems' => 10, ]); ``` Commits ------- 84b3938 Add metadata filtering support to InMemoryStore and CacheStore
2 parents e6c435d + 84b3938 commit f0fbf2b

File tree

5 files changed

+307
-5
lines changed

5 files changed

+307
-5
lines changed

docs/components/store/local.rst

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
Local Stores (InMemory & Cache)
2+
===============================
3+
4+
The local stores provide in-memory vector storage without external dependencies.
5+
6+
.. note::
7+
8+
Both ``InMemoryStore`` and ``CacheStore`` load all data into PHP memory during queries.
9+
The dataset must fit within PHP's memory limit.
10+
11+
InMemoryStore
12+
-------------
13+
14+
Stores vectors in a PHP array. Data is not persisted and is lost when the PHP process ends::
15+
16+
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
17+
18+
$store = new InMemoryStore();
19+
$store->add($document1, $document2);
20+
$results = $store->query($vector);
21+
22+
CacheStore
23+
----------
24+
25+
Stores vectors using a PSR-6 cache implementation. Persistence depends on the cache adapter used::
26+
27+
use Symfony\AI\Store\Bridge\Local\CacheStore;
28+
use Symfony\Component\Cache\Adapter\FilesystemAdapter;
29+
30+
$cache = new FilesystemAdapter();
31+
$store = new CacheStore($cache);
32+
$store->add($document1, $document2);
33+
$results = $store->query($vector);
34+
35+
Distance Strategies
36+
-------------------
37+
38+
Both stores support different distance calculation strategies::
39+
40+
use Symfony\AI\Store\Bridge\Local\DistanceCalculator;
41+
use Symfony\AI\Store\Bridge\Local\DistanceStrategy;
42+
43+
$calculator = new DistanceCalculator(DistanceStrategy::COSINE_DISTANCE);
44+
$store = new InMemoryStore($calculator);
45+
46+
Available strategies:
47+
48+
* ``COSINE_DISTANCE`` (default)
49+
* ``EUCLIDEAN_DISTANCE``
50+
* ``MANHATTAN_DISTANCE``
51+
* ``ANGULAR_DISTANCE``
52+
* ``CHEBYSHEV_DISTANCE``
53+
54+
Metadata Filtering
55+
------------------
56+
57+
Both stores support filtering search results based on document metadata using a callable::
58+
59+
use Symfony\AI\Store\Document\VectorDocument;
60+
61+
$results = $store->query($vector, [
62+
'filter' => fn(VectorDocument $doc) => $doc->metadata['category'] === 'products',
63+
]);
64+
65+
You can combine multiple conditions::
66+
67+
$results = $store->query($vector, [
68+
'filter' => fn(VectorDocument $doc) =>
69+
$doc->metadata['price'] <= 100
70+
&& $doc->metadata['stock'] > 0
71+
&& $doc->metadata['enabled'] === true,
72+
'maxItems' => 10,
73+
]);
74+
75+
Filter nested metadata::
76+
77+
$results = $store->query($vector, [
78+
'filter' => fn(VectorDocument $doc) =>
79+
$doc->metadata['options']['size'] === 'S'
80+
&& $doc->metadata['options']['color'] === 'blue',
81+
]);
82+
83+
Use array functions for complex filtering::
84+
85+
$allowedBrands = ['Nike', 'Adidas', 'Puma'];
86+
$results = $store->query($vector, [
87+
'filter' => fn(VectorDocument $doc) =>
88+
\in_array($doc->metadata['brand'] ?? '', $allowedBrands, true),
89+
]);
90+
91+
.. note::
92+
93+
Filtering is applied before distance calculation.
94+
95+
Query Options
96+
-------------
97+
98+
Both stores support the following query options:
99+
100+
* ``maxItems`` (int) - Limit the number of results returned
101+
* ``filter`` (callable) - Filter documents by metadata before distance calculation
102+
103+
Example combining both options::
104+
105+
$results = $store->query($vector, [
106+
'maxItems' => 5,
107+
'filter' => fn(VectorDocument $doc) => $doc->metadata['active'] === true,
108+
]);

src/store/src/Bridge/Local/CacheStore.php

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,10 @@ public function add(VectorDocument ...$documents): void
7272

7373
/**
7474
* @param array{
75-
* maxItems?: positive-int
76-
* } $options If maxItems is provided, only the top N results will be returned
75+
* maxItems?: positive-int,
76+
* filter?: callable(VectorDocument): bool
77+
* } $options If maxItems is provided, only the top N results will be returned.
78+
* If filter is provided, only documents matching the filter will be considered.
7779
*/
7880
public function query(Vector $vector, array $options = []): array
7981
{
@@ -85,6 +87,10 @@ public function query(Vector $vector, array $options = []): array
8587
metadata: new Metadata($document['metadata']),
8688
), $documents);
8789

90+
if (isset($options['filter'])) {
91+
$vectorDocuments = array_values(array_filter($vectorDocuments, $options['filter']));
92+
}
93+
8894
return $this->distanceCalculator->calculate($vectorDocuments, $vector, $options['maxItems'] ?? null);
8995
}
9096

src/store/src/Bridge/Local/InMemoryStore.php

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,20 @@ public function add(VectorDocument ...$documents): void
4848

4949
/**
5050
* @param array{
51-
* maxItems?: positive-int
52-
* } $options If maxItems is provided, only the top N results will be returned
51+
* maxItems?: positive-int,
52+
* filter?: callable(VectorDocument): bool
53+
* } $options If maxItems is provided, only the top N results will be returned.
54+
* If filter is provided, only documents matching the filter will be considered.
5355
*/
5456
public function query(Vector $vector, array $options = []): array
5557
{
56-
return $this->distanceCalculator->calculate($this->documents, $vector, $options['maxItems'] ?? null);
58+
$documents = $this->documents;
59+
60+
if (isset($options['filter'])) {
61+
$documents = array_values(array_filter($documents, $options['filter']));
62+
}
63+
64+
return $this->distanceCalculator->calculate($documents, $vector, $options['maxItems'] ?? null);
5765
}
5866

5967
public function drop(): void

src/store/tests/Bridge/Local/CacheStoreTest.php

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use Symfony\AI\Store\Bridge\Local\CacheStore;
1717
use Symfony\AI\Store\Bridge\Local\DistanceCalculator;
1818
use Symfony\AI\Store\Bridge\Local\DistanceStrategy;
19+
use Symfony\AI\Store\Document\Metadata;
1920
use Symfony\AI\Store\Document\VectorDocument;
2021
use Symfony\Component\Cache\Adapter\ArrayAdapter;
2122
use Symfony\Component\Uid\Uuid;
@@ -162,4 +163,93 @@ public function testStoreCanSearchUsingChebyshevDistance()
162163
$this->assertCount(2, $result);
163164
$this->assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData());
164165
}
166+
167+
public function testStoreCanSearchWithFilter()
168+
{
169+
$store = new CacheStore(new ArrayAdapter());
170+
$store->add(
171+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['category' => 'products', 'enabled' => true])),
172+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['category' => 'articles', 'enabled' => true])),
173+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['category' => 'products', 'enabled' => false])),
174+
);
175+
176+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
177+
'filter' => fn (VectorDocument $doc) => 'products' === $doc->metadata['category'],
178+
]);
179+
180+
$this->assertCount(2, $result);
181+
$this->assertSame('products', $result[0]->metadata['category']);
182+
$this->assertSame('products', $result[1]->metadata['category']);
183+
}
184+
185+
public function testStoreCanSearchWithFilterAndMaxItems()
186+
{
187+
$store = new CacheStore(new ArrayAdapter());
188+
$store->add(
189+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['category' => 'products'])),
190+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['category' => 'articles'])),
191+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['category' => 'products'])),
192+
new VectorDocument(Uuid::v4(), new Vector([0.0, 0.1, 0.6]), new Metadata(['category' => 'products'])),
193+
);
194+
195+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
196+
'filter' => fn (VectorDocument $doc) => 'products' === $doc->metadata['category'],
197+
'maxItems' => 2,
198+
]);
199+
200+
$this->assertCount(2, $result);
201+
$this->assertSame('products', $result[0]->metadata['category']);
202+
$this->assertSame('products', $result[1]->metadata['category']);
203+
}
204+
205+
public function testStoreCanSearchWithComplexFilter()
206+
{
207+
$store = new CacheStore(new ArrayAdapter());
208+
$store->add(
209+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['price' => 100, 'stock' => 5])),
210+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['price' => 200, 'stock' => 0])),
211+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['price' => 50, 'stock' => 10])),
212+
);
213+
214+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
215+
'filter' => fn (VectorDocument $doc) => $doc->metadata['price'] <= 150 && $doc->metadata['stock'] > 0,
216+
]);
217+
218+
$this->assertCount(2, $result);
219+
}
220+
221+
public function testStoreCanSearchWithNestedMetadataFilter()
222+
{
223+
$store = new CacheStore(new ArrayAdapter());
224+
$store->add(
225+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['options' => ['size' => 'S', 'color' => 'blue']])),
226+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['options' => ['size' => 'M', 'color' => 'blue']])),
227+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['options' => ['size' => 'S', 'color' => 'red']])),
228+
);
229+
230+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
231+
'filter' => fn (VectorDocument $doc) => 'S' === $doc->metadata['options']['size'],
232+
]);
233+
234+
$this->assertCount(2, $result);
235+
$this->assertSame('S', $result[0]->metadata['options']['size']);
236+
$this->assertSame('S', $result[1]->metadata['options']['size']);
237+
}
238+
239+
public function testStoreCanSearchWithInArrayFilter()
240+
{
241+
$store = new CacheStore(new ArrayAdapter());
242+
$store->add(
243+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['brand' => 'Nike'])),
244+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['brand' => 'Adidas'])),
245+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['brand' => 'Generic'])),
246+
);
247+
248+
$allowedBrands = ['Nike', 'Adidas', 'Puma'];
249+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
250+
'filter' => fn (VectorDocument $doc) => \in_array($doc->metadata['brand'] ?? '', $allowedBrands, true),
251+
]);
252+
253+
$this->assertCount(2, $result);
254+
}
165255
}

src/store/tests/Bridge/Local/InMemoryStoreTest.php

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use Symfony\AI\Store\Bridge\Local\DistanceCalculator;
1717
use Symfony\AI\Store\Bridge\Local\DistanceStrategy;
1818
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
19+
use Symfony\AI\Store\Document\Metadata;
1920
use Symfony\AI\Store\Document\VectorDocument;
2021
use Symfony\Component\Uid\Uuid;
2122

@@ -161,4 +162,93 @@ public function testStoreCanSearchUsingChebyshevDistance()
161162
$this->assertCount(2, $result);
162163
$this->assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData());
163164
}
165+
166+
public function testStoreCanSearchWithFilter()
167+
{
168+
$store = new InMemoryStore();
169+
$store->add(
170+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['category' => 'products', 'enabled' => true])),
171+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['category' => 'articles', 'enabled' => true])),
172+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['category' => 'products', 'enabled' => false])),
173+
);
174+
175+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
176+
'filter' => fn (VectorDocument $doc) => 'products' === $doc->metadata['category'],
177+
]);
178+
179+
$this->assertCount(2, $result);
180+
$this->assertSame('products', $result[0]->metadata['category']);
181+
$this->assertSame('products', $result[1]->metadata['category']);
182+
}
183+
184+
public function testStoreCanSearchWithFilterAndMaxItems()
185+
{
186+
$store = new InMemoryStore();
187+
$store->add(
188+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['category' => 'products'])),
189+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['category' => 'articles'])),
190+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['category' => 'products'])),
191+
new VectorDocument(Uuid::v4(), new Vector([0.0, 0.1, 0.6]), new Metadata(['category' => 'products'])),
192+
);
193+
194+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
195+
'filter' => fn (VectorDocument $doc) => 'products' === $doc->metadata['category'],
196+
'maxItems' => 2,
197+
]);
198+
199+
$this->assertCount(2, $result);
200+
$this->assertSame('products', $result[0]->metadata['category']);
201+
$this->assertSame('products', $result[1]->metadata['category']);
202+
}
203+
204+
public function testStoreCanSearchWithComplexFilter()
205+
{
206+
$store = new InMemoryStore();
207+
$store->add(
208+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['price' => 100, 'stock' => 5])),
209+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['price' => 200, 'stock' => 0])),
210+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['price' => 50, 'stock' => 10])),
211+
);
212+
213+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
214+
'filter' => fn (VectorDocument $doc) => $doc->metadata['price'] <= 150 && $doc->metadata['stock'] > 0,
215+
]);
216+
217+
$this->assertCount(2, $result);
218+
}
219+
220+
public function testStoreCanSearchWithNestedMetadataFilter()
221+
{
222+
$store = new InMemoryStore();
223+
$store->add(
224+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['options' => ['size' => 'S', 'color' => 'blue']])),
225+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['options' => ['size' => 'M', 'color' => 'blue']])),
226+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['options' => ['size' => 'S', 'color' => 'red']])),
227+
);
228+
229+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
230+
'filter' => fn (VectorDocument $doc) => 'S' === $doc->metadata['options']['size'],
231+
]);
232+
233+
$this->assertCount(2, $result);
234+
$this->assertSame('S', $result[0]->metadata['options']['size']);
235+
$this->assertSame('S', $result[1]->metadata['options']['size']);
236+
}
237+
238+
public function testStoreCanSearchWithInArrayFilter()
239+
{
240+
$store = new InMemoryStore();
241+
$store->add(
242+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5]), new Metadata(['brand' => 'Nike'])),
243+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0]), new Metadata(['brand' => 'Adidas'])),
244+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1]), new Metadata(['brand' => 'Generic'])),
245+
);
246+
247+
$allowedBrands = ['Nike', 'Adidas', 'Puma'];
248+
$result = $store->query(new Vector([0.0, 0.1, 0.6]), [
249+
'filter' => fn (VectorDocument $doc) => \in_array($doc->metadata['brand'] ?? '', $allowedBrands, true),
250+
]);
251+
252+
$this->assertCount(2, $result);
253+
}
164254
}

0 commit comments

Comments
 (0)