Skip to content

Commit c2b7a1f

Browse files
committed
various refactoring and introduction of a simulate command
The new command makes it easier to run the same chat questions against multiple models and compare the results in a spreadsheet
1 parent 51aa851 commit c2b7a1f

File tree

10 files changed

+405
-99
lines changed

10 files changed

+405
-99
lines changed

Chunk.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ public function __construct(
3232

3333
public function __toString(): string
3434
{
35-
return $this->page . '#' . $this->id;
35+
$string = $this->page . '#' . $this->id;
36+
if ($this->score) {
37+
$string .= ' (' . $this->score . ')';
38+
}
39+
return $string;
3640
}
3741

3842
/**

Embeddings.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ class Embeddings
3939
/** @var array remember sentences when chunking */
4040
private $sentenceQueue = [];
4141

42+
/** @var int the time spent for the last similar chunk retrieval */
43+
public $timeSpent = 0;
44+
4245
protected $configChunkSize;
4346
protected $configContextChunks;
4447

@@ -234,10 +237,11 @@ public function getSimilarChunks($query, $lang = '')
234237

235238
$time = microtime(true);
236239
$chunks = $this->storage->getSimilarChunks($vector, $lang, $fetch);
240+
$this->timeSpent = microtime(true) - $time;
237241
if ($this->logger instanceof CLI) {
238242
$this->logger->info(
239243
'Fetched {count} similar chunks from store in {time} seconds',
240-
['count' => count($chunks), 'time' => round(microtime(true) - $time, 2)]
244+
['count' => count($chunks), 'time' => round($this->timeSpent, 2)]
241245
);
242246
}
243247

Model/AbstractModel.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ public function getUsageStats()
114114

115115
return [
116116
'tokens' => $this->inputTokensUsed + $this->outputTokensUsed,
117-
'cost' => round($cost / 1_000_000, 4),
117+
'cost' => sprintf("%.6f", $cost / 1_000_000),
118118
'time' => round($this->timeUsed, 2),
119119
'requests' => $this->requestsMade,
120120
];

Model/Mistral/AbstractMistralModel.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ protected function parseAPIResponse($response)
4343
$this->outputTokensUsed += $response['usage']['completion_tokens'] ?? 0;
4444
}
4545

46-
if (isset($response['error'])) {
47-
throw new \Exception('Mistral API error: ' . $response['error']['message']);
46+
if (isset($response['object']) && $response['object'] === 'error') {
47+
throw new \Exception('Mistral API error: ' . $response['message']);
4848
}
4949

5050
return $response;

Model/Mistral/ChatModel.php

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,25 @@ class ChatModel extends AbstractMistralModel implements ChatInterface
99
/** @inheritdoc */
1010
public function getAnswer(array $messages): string
1111
{
12+
// Mistral allows only for a system message at the beginning of the chat
13+
// https://discord.com/channels/1144547040454508606/1220314306844037150
14+
$system = '';
15+
$chat = [];
16+
foreach ($messages as $message) {
17+
if ($message['role'] === 'system') {
18+
$system .= $message['content'] . "\n";
19+
} else {
20+
$chat[] = $message;
21+
}
22+
}
23+
$system = trim($system);
24+
if ($system) {
25+
array_unshift($chat, ['role' => 'system', 'content' => $system]);
26+
}
27+
28+
1229
$data = [
13-
'messages' => $messages,
30+
'messages' => $chat,
1431
'model' => $this->getModelName(),
1532
'max_tokens' => null,
1633
'stream' => false,

Model/VoyageAI/EmbeddingModel.php

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ protected function request($endpoint, $data)
4949
protected function parseAPIResponse($response)
5050
{
5151
if (isset($response['usage'])) {
52-
$this->inputTokensUsed += $response['usage']['prompt_tokens'];
53-
$this->outputTokensUsed += $response['usage']['completion_tokens'] ?? 0;
52+
$this->inputTokensUsed += $response['usage']['total_tokens'];
5453
}
5554

5655
if (isset($response['error'])) {

ModelFactory.php

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
<?php
2+
3+
namespace dokuwiki\plugin\aichat;
4+
5+
use dokuwiki\plugin\aichat\Model\ChatInterface;
6+
use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
7+
8+
class ModelFactory
9+
{
10+
/** @var array The plugin configuration */
11+
protected array $config;
12+
13+
public $chatModel;
14+
public $rephraseModel;
15+
public $embeddingModel;
16+
17+
protected $debug = false;
18+
19+
/**
20+
* @param array $config The plugin configuration
21+
*/
22+
public function __construct(array $config)
23+
{
24+
$this->config = $config;
25+
}
26+
27+
/**
28+
* Set the debug flag for all models
29+
*
30+
* @param bool $debug
31+
*/
32+
public function setDebug(bool $debug=true)
33+
{
34+
$this->debug = $debug;
35+
$this->getChatModel()->setDebug($debug);
36+
$this->getRephraseModel()->setDebug($debug);
37+
$this->getEmbeddingModel()->setDebug($debug);
38+
}
39+
40+
/**
41+
* Access a cached Chat Model
42+
*
43+
* @return ChatInterface
44+
* @throws \Exception
45+
*/
46+
public function getChatModel()
47+
{
48+
if ($this->chatModel instanceof ChatInterface) {
49+
return $this->chatModel;
50+
}
51+
$this->chatModel = $this->loadModel('chat', $this->config['chatmodel']);
52+
return $this->chatModel;
53+
}
54+
55+
/**
56+
* Access a cached Rephrase Model
57+
*
58+
* @return ChatInterface
59+
* @throws \Exception
60+
*/
61+
public function getRephraseModel()
62+
{
63+
if ($this->rephraseModel instanceof ChatInterface) {
64+
return $this->rephraseModel;
65+
}
66+
$this->rephraseModel = $this->loadModel('chat', $this->config['chatmodel']);
67+
return $this->rephraseModel;
68+
}
69+
70+
/**
71+
* Access a cached Embedding Model
72+
*
73+
* @return EmbeddingInterface
74+
*/
75+
public function getEmbeddingModel()
76+
{
77+
if ($this->embeddingModel instanceof EmbeddingInterface) {
78+
return $this->embeddingModel;
79+
}
80+
$this->embeddingModel = $this->loadModel('embedding', $this->config['embedmodel']);
81+
return $this->embeddingModel;
82+
}
83+
84+
/**
85+
* Get all known models
86+
*
87+
* A (new) instance is returned for each model that is available through the current configuration.
88+
*
89+
* @param bool $availableOnly Only return models that are available
90+
* @param string $typeOnly Only return models of this type ('chat' or 'embedding')
91+
* @return array
92+
*/
93+
public function getModels($availableOnly = false, $typeOnly = '')
94+
{
95+
$result = [
96+
'chat' => [],
97+
'embedding' => [],
98+
];
99+
100+
$jsons = glob(__DIR__ . '/Model/*/models.json');
101+
foreach ($jsons as $json) {
102+
$models = json_decode(file_get_contents($json), true);
103+
foreach ($models as $type => $model) {
104+
$namespace = basename(dirname($json));
105+
foreach ($model as $name => $info) {
106+
try {
107+
$info['instance'] = $this->loadModel($type, "$namespace $name");
108+
$info['instance']->setDebug($this->debug);
109+
} catch (\Exception $e) {
110+
if ($availableOnly) continue;
111+
$info['instance'] = false;
112+
}
113+
114+
$result[$type]["$namespace $name"] = $info;
115+
}
116+
}
117+
}
118+
119+
return $typeOnly ? $result[$typeOnly] : $result;
120+
}
121+
122+
123+
/**
124+
* Initialize a model by config name
125+
*
126+
* @param string $type 'chat' or 'embedding'
127+
* @param string $name The full model name including provider
128+
* @return ChatInterface|EmbeddingInterface
129+
* @throws \Exception
130+
*/
131+
public function loadModel(string $type, string $name)
132+
{
133+
$type = ucfirst(strtolower($type));
134+
$prefix = '\\dokuwiki\\plugin\\aichat\\Model\\';
135+
$cname = $type . 'Model';
136+
$interface = $prefix . $type . 'Interface';
137+
138+
139+
[$namespace, $model] = sexplode(' ', $name, 2, '');
140+
$class = $prefix . $namespace . '\\' . $cname;
141+
142+
if (!class_exists($class)) {
143+
throw new \Exception("No $cname found for $namespace");
144+
}
145+
146+
try {
147+
$instance = new $class($model, $this->config);
148+
} catch (\Exception $e) {
149+
throw new \Exception("Failed to initialize $cname for $namespace: " . $e->getMessage(), 0, $e);
150+
}
151+
152+
if (!($instance instanceof $interface)) {
153+
throw new \Exception("$cname for $namespace does not implement $interface");
154+
}
155+
156+
return $instance;
157+
}
158+
159+
}

cli.php

Lines changed: 17 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
use dokuwiki\Extension\CLIPlugin;
44
use dokuwiki\plugin\aichat\Chunk;
5+
use dokuwiki\plugin\aichat\ModelFactory;
56
use dokuwiki\Search\Indexer;
67
use splitbrain\phpcli\Colors;
78
use splitbrain\phpcli\Options;
@@ -18,11 +19,13 @@ class cli_plugin_aichat extends CLIPlugin
1819
/** @var helper_plugin_aichat */
1920
protected $helper;
2021

22+
/** @inheritdoc */
2123
public function __construct($autocatch = true)
2224
{
2325
parent::__construct($autocatch);
2426
$this->helper = plugin_load('helper', 'aichat');
2527
$this->helper->setLogger($this);
28+
$this->loadConfig();
2629
}
2730

2831
/** @inheritDoc */
@@ -77,7 +80,10 @@ protected function setup(Options $options)
7780
/** @inheritDoc */
7881
protected function main(Options $options)
7982
{
80-
$this->loadConfig();
83+
if ($this->loglevel['debug']['enabled']) {
84+
$this->helper->factory->setDebug(true);
85+
}
86+
8187
ini_set('memory_limit', -1);
8288
switch ($options->getCmd()) {
8389
case 'embed':
@@ -219,52 +225,26 @@ protected function split($page)
219225
*/
220226
protected function chat()
221227
{
222-
if ($this->loglevel['debug']['enabled']) {
223-
$this->helper->getChatModel()->setDebug(true);
224-
$this->helper->getRephraseModel()->setDebug(true);
225-
$this->helper->getEmbedModel()->setDebug(true);
226-
}
227-
228228
$history = [];
229229
while ($q = $this->readLine('Your Question')) {
230230
$this->helper->getChatModel()->resetUsageStats();
231231
$this->helper->getRephraseModel()->resetUsageStats();
232-
$this->helper->getEmbedModel()->resetUsageStats();
232+
$this->helper->getEmbeddingModel()->resetUsageStats();
233233
$result = $this->helper->askChatQuestion($q, $history);
234234
$this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
235235
$history[] = [$result['question'], $result['answer']];
236236
$this->printAnswer($result);
237237
}
238238
}
239239

240+
/**
241+
* Print information about the available models
242+
*
243+
* @return void
244+
*/
240245
protected function models()
241246
{
242-
$result = [
243-
'chat' => [],
244-
'embedding' => [],
245-
];
246-
247-
248-
$jsons = glob(__DIR__ . '/Model/*/models.json');
249-
foreach ($jsons as $json) {
250-
$models = json_decode(file_get_contents($json), true);
251-
foreach ($models as $type => $model) {
252-
$namespace = basename(dirname($json));
253-
foreach ($model as $name => $info) {
254-
255-
256-
$class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\' . ucfirst($type) . 'Model';
257-
try {
258-
new $class($name, $this->conf);
259-
$info['confok'] = true;
260-
} catch (Exception $e) {
261-
$info['confok'] = false;
262-
}
263-
264-
$result[$type]["$namespace $name"] = $info;
265-
}
266-
}
267-
}
247+
$result = (new ModelFactory($this->conf))->getModels();
268248

269249
$td = new TableFormatter($this->colors);
270250
$cols = [30, 20, 20, '*'];
@@ -284,7 +264,7 @@ protected function models()
284264
$info['description'] . "\n"
285265
],
286266
[
287-
$info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
267+
$info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
288268
]
289269
);
290270
}
@@ -307,7 +287,7 @@ protected function models()
307287
$info['description'] . "\n"
308288
],
309289
[
310-
$info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
290+
$info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
311291
]
312292
);
313293
}
@@ -324,12 +304,6 @@ protected function models()
324304
*/
325305
protected function ask($query)
326306
{
327-
if ($this->loglevel['debug']['enabled']) {
328-
$this->helper->getChatModel()->setDebug(true);
329-
$this->helper->getRephraseModel()->setDebug(true);
330-
$this->helper->getEmbedModel()->setDebug(true);
331-
}
332-
333307
$result = $this->helper->askQuestion($query);
334308
$this->printAnswer($result);
335309
}
@@ -441,7 +415,7 @@ protected function printUsage()
441415
{
442416
$chat = $this->helper->getChatModel()->getUsageStats();
443417
$rephrase = $this->helper->getRephraseModel()->getUsageStats();
444-
$embed = $this->helper->getEmbedModel()->getUsageStats();
418+
$embed = $this->helper->getEmbeddingModel()->getUsageStats();
445419

446420
$this->info(
447421
'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.',

0 commit comments

Comments
 (0)