Skip to content

Commit 720bb43

Browse files
committed
make threshold configurable
1 parent 0de7e02 commit 720bb43

File tree

5 files changed

+17
-6
lines changed

5 files changed

+17
-6
lines changed

Embeddings.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class Embeddings
4444

4545
protected $configChunkSize;
4646
protected $configContextChunks;
47+
protected $similarityThreshold;
4748

4849
/**
4950
* Embeddings constructor.
@@ -64,6 +65,7 @@ public function __construct(
6465
$this->storage = $storage;
6566
$this->configChunkSize = $config['chunkSize'];
6667
$this->configContextChunks = $config['contextChunks'];
68+
$this->similarityThreshold = $config['similarityThreshold']/100;
6769
}
6870

6971
/**
@@ -249,6 +251,7 @@ public function getSimilarChunks($query, $lang = '')
249251
foreach ($chunks as $chunk) {
250252
// filter out chunks the user is not allowed to read
251253
if ($auth && auth_quickaclcheck($chunk->getPage()) < AUTH_READ) continue;
254+
if($chunk->getScore() < $this->similarityThreshold) continue;
252255

253256
$chunkSize = count($this->getTokenEncoder()->encode($chunk->getText()));
254257
if ($size + $chunkSize > $this->chatModel->getMaxInputTokenLength()) break; // we have enough

Storage/SQLiteStorage.php

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
*/
1818
class SQLiteStorage extends AbstractStorage
1919
{
20-
/** @var float minimum similarity to consider a chunk a match */
21-
final public const SIMILARITY_THRESHOLD = 0;
2220

2321
/** @var int Number of documents to randomly sample to create the clusters */
2422
final public const SAMPLE_SIZE = 2000;
@@ -30,6 +28,9 @@ class SQLiteStorage extends AbstractStorage
3028

3129
protected $useLanguageClusters = false;
3230

31+
/** @var float minimum similarity to consider a chunk a match */
32+
protected $similarityThreshold = 0;
33+
3334
/** @inheritdoc */
3435
public function __construct(array $config)
3536
{
@@ -38,6 +39,8 @@ public function __construct(array $config)
3839

3940
$helper = plugin_load('helper', 'aichat');
4041
$this->useLanguageClusters = $helper->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED;
42+
43+
$this->similarityThreshold = $config['similarityThreshold']/100;
4144
}
4245

4346
/** @inheritdoc */
@@ -148,7 +151,7 @@ public function getSimilarChunks($vector, $lang = '', $limit = 4)
148151
AND similarity > CAST(? AS FLOAT)
149152
ORDER BY similarity DESC
150153
LIMIT ?',
151-
[json_encode($vector, JSON_THROW_ON_ERROR), $cluster, self::SIMILARITY_THRESHOLD, $limit]
154+
[json_encode($vector, JSON_THROW_ON_ERROR), $cluster, $this->similarityThreshold, $limit]
152155
);
153156
$chunks = [];
154157
foreach ($result as $record) {

conf/default.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
$conf['qdrant_collection'] = 'aichat';
3535

3636
$conf['chunkSize'] = 1500;
37+
$conf['similarityThreshold'] = 75;
3738
$conf['contextChunks'] = 5;
3839
$conf['chatHistory'] = 1;
3940
$conf['rephraseHistory'] = 1;

conf/metadata.php

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,11 @@
4040
$meta['qdrant_apikey'] = array('password');
4141
$meta['qdrant_collection'] = array('string');
4242

43-
$meta['chunkSize'] = array('numeric', '_min' => 100);
44-
$meta['contextChunks'] = array('numeric', '_min' => 1);
43+
$meta['chunkSize'] = array('numeric', '_min' => 100, '_regexp' => '/^\d+$/');
44+
$meta['similarityThreshold'] = array('numeric', '_min' => 0, '_max' => 100, '_regexp' => '/^\d+$/');
45+
$meta['contextChunks'] = array('numeric', '_min' => 1, '_regexp' => '/^\d+$/');
46+
$meta['chatHistory'] = array('numeric', '_min' => 0, '_regexp' => '/^\d+$/');
47+
$meta['rephraseHistory'] = array('numeric', '_min' => 0, '_regexp' => '/^\d+$/');
4548

4649
$meta['logging'] = array('onoff');
4750
$meta['restrict'] = array('string');

lang/en/settings.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
$lang['qdrant_collection'] = '📥 <b>Qdrant</b> collection. Will be created.';
3333

3434
$lang['chunkSize'] = 'Maximum number of tokens per chunk.<br>🔄 You need to rebuild the vector storage when changing this setting.';
35-
$lang['contextChunks'] = 'Number of chunks to send to the AI model for context.';
35+
$lang['similarityThreshold'] = 'Minimum similarity threshold when selecting sources for a question. 0-100.';
36+
$lang['contextChunks'] = 'Maximum number of chunks to send to the AI model for context.';
3637
$lang['chatHistory'] = 'Number of previous chat messages to consider for context in the conversation.';
3738
$lang['rephraseHistory'] = 'Number of previous chat messages to consider for context when rephrasing a question. Set to 0 to disable rephrasing.';
3839

0 commit comments

Comments
 (0)