Skip to content

Commit 8c0c4bb

Browse files
ahmed-bhschr-hertel
authored andcommitted
[Store][Meilisearch] Add configurable semantic ratio
1 parent ddceb68 commit 8c0c4bb

File tree

7 files changed

+316
-2
lines changed

7 files changed

+316
-2
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Fixtures\Movies;
13+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
14+
use Symfony\AI\Store\Bridge\Meilisearch\Store;
15+
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
16+
use Symfony\AI\Store\Document\Metadata;
17+
use Symfony\AI\Store\Document\TextDocument;
18+
use Symfony\AI\Store\Document\Vectorizer;
19+
use Symfony\AI\Store\Indexer;
20+
use Symfony\Component\Uid\Uuid;
21+
22+
require_once dirname(__DIR__).'/bootstrap.php';
23+
24+
echo "=== Meilisearch Hybrid Search Demo ===\n\n";
25+
echo "This example demonstrates how to configure the semantic ratio to balance\n";
26+
echo "between semantic (vector) search and full-text search in Meilisearch.\n\n";
27+
28+
// Initialize the store with a balanced hybrid search (50/50)
29+
$store = new Store(
30+
httpClient: http_client(),
31+
endpointUrl: env('MEILISEARCH_HOST'),
32+
apiKey: env('MEILISEARCH_API_KEY'),
33+
indexName: 'movies_hybrid',
34+
semanticRatio: 0.5, // Balanced hybrid search by default
35+
);
36+
37+
// Create embeddings and documents
38+
$documents = [];
39+
foreach (Movies::all() as $i => $movie) {
40+
$documents[] = new TextDocument(
41+
id: Uuid::v4(),
42+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
43+
metadata: new Metadata($movie),
44+
);
45+
}
46+
47+
// Initialize the index
48+
$store->setup();
49+
50+
// Create embeddings for documents
51+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
52+
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger());
53+
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
54+
$indexer->index($documents);
55+
56+
// Create a query embedding
57+
$queryText = 'futuristic technology and artificial intelligence';
58+
echo "Query: \"$queryText\"\n\n";
59+
$queryEmbedding = $vectorizer->vectorize($queryText);
60+
61+
// Test different semantic ratios to compare results
62+
$ratios = [
63+
['ratio' => 0.0, 'description' => '100% Full-text search (keyword matching)'],
64+
['ratio' => 0.5, 'description' => 'Balanced hybrid (50% semantic + 50% full-text)'],
65+
['ratio' => 1.0, 'description' => '100% Semantic search (vector similarity)'],
66+
];
67+
68+
foreach ($ratios as $config) {
69+
echo "--- {$config['description']} ---\n";
70+
71+
// Override the semantic ratio for this specific query
72+
$results = $store->query($queryEmbedding, [
73+
'semanticRatio' => $config['ratio'],
74+
'q' => 'technology', // Full-text search keyword
75+
]);
76+
77+
echo "Top 3 results:\n";
78+
foreach (array_slice($results, 0, 3) as $i => $result) {
79+
$metadata = $result->metadata->getArrayCopy();
80+
echo sprintf(
81+
" %d. %s (Score: %.4f)\n",
82+
$i + 1,
83+
$metadata['title'] ?? 'Unknown',
84+
$result->score ?? 0.0
85+
);
86+
}
87+
echo "\n";
88+
}
89+
90+
echo "--- Custom query with pure semantic search ---\n";
91+
echo "Query: Movies about space exploration\n";
92+
$spaceEmbedding = $vectorizer->vectorize('space exploration and cosmic adventures');
93+
$results = $store->query($spaceEmbedding, [
94+
'semanticRatio' => 1.0, // Pure semantic search
95+
]);
96+
97+
echo "Top 3 results:\n";
98+
foreach (array_slice($results, 0, 3) as $i => $result) {
99+
$metadata = $result->metadata->getArrayCopy();
100+
echo sprintf(
101+
" %d. %s (Score: %.4f)\n",
102+
$i + 1,
103+
$metadata['title'] ?? 'Unknown',
104+
$result->score ?? 0.0
105+
);
106+
}
107+
echo "\n";
108+
109+
// Cleanup
110+
$store->drop();
111+
112+
echo "=== Summary ===\n";
113+
echo "- semanticRatio = 0.0: Best for exact keyword matches\n";
114+
echo "- semanticRatio = 0.5: Balanced approach combining both methods\n";
115+
echo "- semanticRatio = 1.0: Best for conceptual similarity searches\n";
116+
echo "\nYou can set the default ratio when instantiating the Store,\n";
117+
echo "and override it per query using the 'semanticRatio' option.\n";

src/ai-bundle/config/options.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,12 @@
570570
->stringNode('embedder')->end()
571571
->stringNode('vector_field')->end()
572572
->integerNode('dimensions')->end()
573+
->floatNode('semantic_ratio')
574+
->info('The ratio between semantic (vector) and full-text search (0.0 to 1.0). Default: 1.0 (100% semantic)')
575+
->defaultValue(1.0)
576+
->min(0.0)
577+
->max(1.0)
578+
->end()
573579
->end()
574580
->end()
575581
->end()

src/ai-bundle/src/AiBundle.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,10 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde
10801080
$arguments[6] = $store['dimensions'];
10811081
}
10821082

1083+
if (\array_key_exists('semantic_ratio', $store)) {
1084+
$arguments[7] = $store['semantic_ratio'];
1085+
}
1086+
10831087
$definition = new Definition(MeilisearchStore::class);
10841088
$definition
10851089
->addTag('ai.store')

src/ai-bundle/tests/DependencyInjection/AiBundleTest.php

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2881,6 +2881,30 @@ public function testMeilisearchMessageStoreIsConfigured()
28812881
$this->assertTrue($meilisearchMessageStoreDefinition->hasTag('ai.message_store'));
28822882
}
28832883

2884+
#[TestDox('Meilisearch store with custom semantic_ratio can be configured')]
2885+
public function testMeilisearchStoreWithCustomSemanticRatioCanBeConfigured()
2886+
{
2887+
$container = $this->buildContainer([
2888+
'ai' => [
2889+
'store' => [
2890+
'meilisearch' => [
2891+
'test_store' => [
2892+
'endpoint' => 'http://127.0.0.1:7700',
2893+
'api_key' => 'test_key',
2894+
'index_name' => 'test_index',
2895+
'semantic_ratio' => 0.5,
2896+
],
2897+
],
2898+
],
2899+
],
2900+
]);
2901+
2902+
$this->assertTrue($container->hasDefinition('ai.store.meilisearch.test_store'));
2903+
$definition = $container->getDefinition('ai.store.meilisearch.test_store');
2904+
$arguments = $definition->getArguments();
2905+
$this->assertSame(0.5, $arguments[7]);
2906+
}
2907+
28842908
public function testMemoryMessageStoreCanBeConfiguredWithCustomKey()
28852909
{
28862910
$container = $this->buildContainer([
@@ -3205,6 +3229,7 @@ private function getFullConfig(): array
32053229
'embedder' => 'default',
32063230
'vector_field' => '_vectors',
32073231
'dimensions' => 768,
3232+
'semantic_ratio' => 0.5,
32083233
],
32093234
],
32103235
'memory' => [

src/store/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,6 @@ CHANGELOG
6060
- Minimum score filtering
6161
- Result limiting
6262
- Distance/similarity scoring
63+
* Add Meilisearch hybrid search support with a configurable `semanticRatio` parameter to control the balance between semantic (vector) and full-text search.
6364
* Add custom exception hierarchy with `ExceptionInterface`
6465
* Add support for specific exceptions for invalid arguments and runtime errors

src/store/src/Bridge/Meilisearch/Store.php

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ final class Store implements ManagedStoreInterface, StoreInterface
2828
{
2929
/**
3030
* @param string $embedder The name of the embedder where vectors are stored
31-
* @param string $vectorFieldName The name of the field int the index that contains the vector
31+
* @param string $vectorFieldName The name of the field in the index that contains the vector
32+
* @param float $semanticRatio The ratio between semantic (vector) and full-text search (0.0 to 1.0)
33+
* - 0.0 = 100% full-text search
34+
* - 0.5 = balanced hybrid search
35+
* - 1.0 = 100% semantic search (vector only)
3236
*/
3337
public function __construct(
3438
private readonly HttpClientInterface $httpClient,
@@ -38,7 +42,11 @@ public function __construct(
3842
private readonly string $embedder = 'default',
3943
private readonly string $vectorFieldName = '_vectors',
4044
private readonly int $embeddingsDimension = 1536,
45+
private readonly float $semanticRatio = 1.0,
4146
) {
47+
if ($semanticRatio < 0.0 || $semanticRatio > 1.0) {
48+
throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio));
49+
}
4250
}
4351

4452
public function setup(array $options = []): void
@@ -71,13 +79,20 @@ public function add(VectorDocument ...$documents): void
7179

7280
public function query(Vector $vector, array $options = []): array
7381
{
82+
$semanticRatio = $options['semanticRatio'] ?? $this->semanticRatio;
83+
84+
if ($semanticRatio < 0.0 || $semanticRatio > 1.0) {
85+
throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio));
86+
}
87+
7488
$result = $this->request('POST', \sprintf('indexes/%s/search', $this->indexName), [
89+
'q' => $options['q'] ?? '',
7590
'vector' => $vector->getData(),
7691
'showRankingScore' => true,
7792
'retrieveVectors' => true,
7893
'hybrid' => [
7994
'embedder' => $this->embedder,
80-
'semanticRatio' => 1.0,
95+
'semanticRatio' => $semanticRatio,
8196
],
8297
]);
8398

src/store/tests/Bridge/Meilisearch/StoreTest.php

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
use Symfony\AI\Platform\Vector\Vector;
1616
use Symfony\AI\Store\Bridge\Meilisearch\Store;
1717
use Symfony\AI\Store\Document\VectorDocument;
18+
use Symfony\AI\Store\Exception\InvalidArgumentException;
1819
use Symfony\Component\HttpClient\Exception\ClientException;
1920
use Symfony\Component\HttpClient\MockHttpClient;
2021
use Symfony\Component\HttpClient\Response\JsonMockResponse;
22+
use Symfony\Component\HttpClient\Response\MockResponse;
2123
use Symfony\Component\Uid\Uuid;
2224

2325
final class StoreTest extends TestCase
@@ -275,4 +277,148 @@ public function testMetadataWithoutIDRankingandVector()
275277

276278
$this->assertSame($expected, $vectors[0]->metadata->getArrayCopy());
277279
}
280+
281+
public function testConstructorWithValidSemanticRatio()
282+
{
283+
$httpClient = new MockHttpClient();
284+
285+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);
286+
287+
$this->assertInstanceOf(Store::class, $store);
288+
}
289+
290+
public function testConstructorThrowsExceptionForInvalidSemanticRatio()
291+
{
292+
$this->expectException(InvalidArgumentException::class);
293+
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');
294+
295+
$httpClient = new MockHttpClient();
296+
new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 1.5);
297+
}
298+
299+
public function testConstructorThrowsExceptionForNegativeSemanticRatio()
300+
{
301+
$this->expectException(InvalidArgumentException::class);
302+
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');
303+
304+
$httpClient = new MockHttpClient();
305+
new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: -0.1);
306+
}
307+
308+
public function testQueryUsesDefaultSemanticRatio()
309+
{
310+
$responses = [
311+
new MockResponse(json_encode([
312+
'hits' => [
313+
[
314+
'id' => '550e8400-e29b-41d4-a716-446655440000',
315+
'_vectors' => [
316+
'default' => [
317+
'embeddings' => [0.1, 0.2, 0.3],
318+
],
319+
],
320+
'_rankingScore' => 0.95,
321+
'content' => 'Test document',
322+
],
323+
],
324+
])),
325+
];
326+
327+
$httpClient = new MockHttpClient($responses);
328+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.7);
329+
330+
$vector = new Vector([0.1, 0.2, 0.3]);
331+
$store->query($vector);
332+
333+
$request = $httpClient->getRequestsCount() > 0 ? $responses[0]->getRequestOptions() : null;
334+
$this->assertNotNull($request);
335+
336+
$body = json_decode($request['body'], true);
337+
$this->assertSame(0.7, $body['hybrid']['semanticRatio']);
338+
}
339+
340+
public function testQueryCanOverrideSemanticRatio()
341+
{
342+
$responses = [
343+
new MockResponse(json_encode([
344+
'hits' => [],
345+
])),
346+
];
347+
348+
$httpClient = new MockHttpClient($responses);
349+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);
350+
351+
$vector = new Vector([0.1, 0.2, 0.3]);
352+
$store->query($vector, ['semanticRatio' => 0.2]);
353+
354+
$request = $responses[0]->getRequestOptions();
355+
$body = json_decode($request['body'], true);
356+
357+
$this->assertSame(0.2, $body['hybrid']['semanticRatio']);
358+
}
359+
360+
public function testQueryThrowsExceptionForInvalidSemanticRatioOption()
361+
{
362+
$this->expectException(InvalidArgumentException::class);
363+
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');
364+
365+
$httpClient = new MockHttpClient();
366+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index');
367+
368+
$vector = new Vector([0.1, 0.2, 0.3]);
369+
$store->query($vector, ['semanticRatio' => 2.0]);
370+
}
371+
372+
public function testQueryWithPureKeywordSearch()
373+
{
374+
$responses = [
375+
new MockResponse(json_encode([
376+
'hits' => [
377+
[
378+
'id' => '550e8400-e29b-41d4-a716-446655440000',
379+
'_vectors' => [
380+
'default' => [
381+
'embeddings' => [0.1, 0.2, 0.3],
382+
],
383+
],
384+
'_rankingScore' => 0.85,
385+
'title' => 'Symfony Framework',
386+
],
387+
],
388+
])),
389+
];
390+
391+
$httpClient = new MockHttpClient($responses);
392+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index');
393+
394+
$vector = new Vector([0.1, 0.2, 0.3]);
395+
$results = $store->query($vector, ['semanticRatio' => 0.0]);
396+
397+
$this->assertCount(1, $results);
398+
$this->assertInstanceOf(VectorDocument::class, $results[0]);
399+
400+
$request = $responses[0]->getRequestOptions();
401+
$body = json_decode($request['body'], true);
402+
$this->assertSame(0.0, $body['hybrid']['semanticRatio']);
403+
}
404+
405+
public function testQueryWithBalancedHybridSearch()
406+
{
407+
$responses = [
408+
new MockResponse(json_encode([
409+
'hits' => [],
410+
])),
411+
];
412+
413+
$httpClient = new MockHttpClient($responses);
414+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);
415+
416+
$vector = new Vector([0.1, 0.2, 0.3]);
417+
$store->query($vector);
418+
419+
$request = $responses[0]->getRequestOptions();
420+
$body = json_decode($request['body'], true);
421+
422+
$this->assertSame(0.5, $body['hybrid']['semanticRatio']);
423+
}
278424
}

0 commit comments

Comments
 (0)