Skip to content

Commit 98c1c7a

Browse files
authored
presets: refactor, allow cascade presets from different sources, add global section (#18169)
* presets: refactor, allow cascade presets from different sources * update docs * fix neg arg handling * fix empty mmproj * also filter out server-controlled args before to_ini() * skip loading custom_models if not specified * fix unset_reserved_args * fix crash on windows
1 parent acb73d8 commit 98c1c7a

File tree

6 files changed

+355
-262
lines changed

6 files changed

+355
-262
lines changed

common/arg.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,11 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
772772
}
773773
auto opt = *arg_to_options[arg];
774774
std::string val;
775+
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
776+
// bool arg (need to reverse the meaning for negative args)
777+
bool is_neg = std::find(opt.args_neg.begin(), opt.args_neg.end(), arg) != opt.args_neg.end();
778+
val = is_neg ? "0" : "1";
779+
}
775780
if (opt.value_hint != nullptr) {
776781
// arg with single value
777782
check_arg(i);

common/preset.cpp

Lines changed: 187 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "preset.h"
33
#include "peg-parser.h"
44
#include "log.h"
5+
#include "download.h"
56

67
#include <fstream>
78
#include <sstream>
@@ -15,9 +16,13 @@ static std::string rm_leading_dashes(const std::string & str) {
1516
return str.substr(pos);
1617
}
1718

18-
std::vector<std::string> common_preset::to_args() const {
19+
std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
1920
std::vector<std::string> args;
2021

22+
if (!bin_path.empty()) {
23+
args.push_back(bin_path);
24+
}
25+
2126
for (const auto & [opt, value] : options) {
2227
args.push_back(opt.args.back()); // use the last arg as the main arg
2328
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
@@ -63,6 +68,52 @@ std::string common_preset::to_ini() const {
6368
return ss.str();
6469
}
6570

71+
void common_preset::set_option(const common_preset_context & ctx, const std::string & env, const std::string & value) {
72+
// try if option exists, update it
73+
for (auto & [opt, val] : options) {
74+
if (opt.env && env == opt.env) {
75+
val = value;
76+
return;
77+
}
78+
}
79+
// if option does not exist, we need to add it
80+
if (ctx.key_to_opt.find(env) == ctx.key_to_opt.end()) {
81+
throw std::runtime_error(string_format(
82+
"%s: option with env '%s' not found in ctx_params",
83+
__func__, env.c_str()
84+
));
85+
}
86+
options[ctx.key_to_opt.at(env)] = value;
87+
}
88+
89+
void common_preset::unset_option(const std::string & env) {
90+
for (auto it = options.begin(); it != options.end(); ) {
91+
const common_arg & opt = it->first;
92+
if (opt.env && env == opt.env) {
93+
it = options.erase(it);
94+
return;
95+
} else {
96+
++it;
97+
}
98+
}
99+
}
100+
101+
bool common_preset::get_option(const std::string & env, std::string & value) const {
102+
for (const auto & [opt, val] : options) {
103+
if (opt.env && env == opt.env) {
104+
value = val;
105+
return true;
106+
}
107+
}
108+
return false;
109+
}
110+
111+
void common_preset::merge(const common_preset & other) {
112+
for (const auto & [opt, val] : other.options) {
113+
options[opt] = val; // overwrite existing options
114+
}
115+
}
116+
66117
static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
67118
std::map<std::string, std::map<std::string, std::string>> parsed;
68119

@@ -172,9 +223,12 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke
172223
return value;
173224
}
174225

175-
common_presets common_presets_load(const std::string & path, common_params_context & ctx_params) {
226+
common_preset_context::common_preset_context(llama_example ex)
227+
: ctx_params(common_params_parser_init(default_params, ex)),
228+
key_to_opt(get_map_key_opt(ctx_params)) {}
229+
230+
common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
176231
common_presets out;
177-
auto key_to_opt = get_map_key_opt(ctx_params);
178232
auto ini_data = parse_ini_from_file(path);
179233

180234
for (auto section : ini_data) {
@@ -188,7 +242,7 @@ common_presets common_presets_load(const std::string & path, common_params_conte
188242
for (const auto & [key, value] : section.second) {
189243
LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
190244
if (key_to_opt.find(key) != key_to_opt.end()) {
191-
auto & opt = key_to_opt[key];
245+
const auto & opt = key_to_opt.at(key);
192246
if (is_bool_arg(opt)) {
193247
preset.options[opt] = parse_bool_arg(opt, key, value);
194248
} else {
@@ -199,8 +253,137 @@ common_presets common_presets_load(const std::string & path, common_params_conte
199253
// TODO: maybe warn about unknown key?
200254
}
201255
}
256+
257+
if (preset.name == "*") {
258+
// handle global preset
259+
global = preset;
260+
} else {
261+
out[preset.name] = preset;
262+
}
263+
}
264+
265+
return out;
266+
}
267+
268+
common_presets common_preset_context::load_from_cache() const {
269+
common_presets out;
270+
271+
auto cached_models = common_list_cached_models();
272+
for (const auto & model : cached_models) {
273+
common_preset preset;
274+
preset.name = model.to_string();
275+
preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string());
202276
out[preset.name] = preset;
203277
}
204278

205279
return out;
206280
}
281+
282+
struct local_model {
283+
std::string name;
284+
std::string path;
285+
std::string path_mmproj;
286+
};
287+
288+
common_presets common_preset_context::load_from_models_dir(const std::string & models_dir) const {
289+
if (!std::filesystem::exists(models_dir) || !std::filesystem::is_directory(models_dir)) {
290+
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", models_dir.c_str()));
291+
}
292+
293+
std::vector<local_model> models;
294+
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
295+
auto files = fs_list(subdir_path, false);
296+
common_file_info model_file;
297+
common_file_info first_shard_file;
298+
common_file_info mmproj_file;
299+
for (const auto & file : files) {
300+
if (string_ends_with(file.name, ".gguf")) {
301+
if (file.name.find("mmproj") != std::string::npos) {
302+
mmproj_file = file;
303+
} else if (file.name.find("-00001-of-") != std::string::npos) {
304+
first_shard_file = file;
305+
} else {
306+
model_file = file;
307+
}
308+
}
309+
}
310+
// single file model
311+
local_model model{
312+
/* name */ name,
313+
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
314+
/* path_mmproj */ mmproj_file.path // can be empty
315+
};
316+
if (!model.path.empty()) {
317+
models.push_back(model);
318+
}
319+
};
320+
321+
auto files = fs_list(models_dir, true);
322+
for (const auto & file : files) {
323+
if (file.is_dir) {
324+
scan_subdir(file.path, file.name);
325+
} else if (string_ends_with(file.name, ".gguf")) {
326+
// single file model
327+
std::string name = file.name;
328+
string_replace_all(name, ".gguf", "");
329+
local_model model{
330+
/* name */ name,
331+
/* path */ file.path,
332+
/* path_mmproj */ ""
333+
};
334+
models.push_back(model);
335+
}
336+
}
337+
338+
// convert local models to presets
339+
common_presets out;
340+
for (const auto & model : models) {
341+
common_preset preset;
342+
preset.name = model.name;
343+
preset.set_option(*this, "LLAMA_ARG_MODEL", model.path);
344+
if (!model.path_mmproj.empty()) {
345+
preset.set_option(*this, "LLAMA_ARG_MMPROJ", model.path_mmproj);
346+
}
347+
out[preset.name] = preset;
348+
}
349+
350+
return out;
351+
}
352+
353+
common_preset common_preset_context::load_from_args(int argc, char ** argv) const {
354+
common_preset preset;
355+
preset.name = COMMON_PRESET_DEFAULT_NAME;
356+
357+
bool ok = common_params_to_map(argc, argv, ctx_params.ex, preset.options);
358+
if (!ok) {
359+
throw std::runtime_error("failed to parse CLI arguments into preset");
360+
}
361+
362+
return preset;
363+
}
364+
365+
common_presets common_preset_context::cascade(const common_presets & base, const common_presets & added) const {
366+
common_presets out = base; // copy
367+
for (const auto & [name, preset_added] : added) {
368+
if (out.find(name) != out.end()) {
369+
// if exists, merge
370+
common_preset & target = out[name];
371+
target.merge(preset_added);
372+
} else {
373+
// otherwise, add directly
374+
out[name] = preset_added;
375+
}
376+
}
377+
return out;
378+
}
379+
380+
common_presets common_preset_context::cascade(const common_preset & base, const common_presets & presets) const {
381+
common_presets out;
382+
for (const auto & [name, preset] : presets) {
383+
common_preset tmp = base; // copy
384+
tmp.name = name;
385+
tmp.merge(preset);
386+
out[name] = std::move(tmp);
387+
}
388+
return out;
389+
}

common/preset.h

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,62 @@
1313

1414
constexpr const char * COMMON_PRESET_DEFAULT_NAME = "default";
1515

16+
struct common_preset_context;
17+
1618
struct common_preset {
1719
std::string name;
18-
// TODO: support repeated args in the future
20+
21+
// options are stored as common_arg to string mapping, representing CLI arg and its value
1922
std::map<common_arg, std::string> options;
2023

2124
// convert preset to CLI argument list
22-
std::vector<std::string> to_args() const;
25+
std::vector<std::string> to_args(const std::string & bin_path = "") const;
2326

2427
// convert preset to INI format string
2528
std::string to_ini() const;
2629

2730
// TODO: maybe implement to_env() if needed
31+
32+
// modify preset options where argument is identified by its env variable
33+
void set_option(const common_preset_context & ctx, const std::string & env, const std::string & value);
34+
35+
// unset option by its env variable
36+
void unset_option(const std::string & env);
37+
38+
// get option value by its env variable, return false if not found
39+
bool get_option(const std::string & env, std::string & value) const;
40+
41+
// merge another preset into this one, overwriting existing options
42+
void merge(const common_preset & other);
2843
};
2944

3045
// interface for multiple presets in one file
3146
using common_presets = std::map<std::string, common_preset>;
32-
common_presets common_presets_load(const std::string & path, common_params_context & ctx_params);
47+
48+
// context for loading and editing presets
49+
struct common_preset_context {
50+
common_params default_params; // unused for now
51+
common_params_context ctx_params;
52+
std::map<std::string, common_arg> key_to_opt;
53+
common_preset_context(llama_example ex);
54+
55+
// load presets from INI file
56+
common_presets load_from_ini(const std::string & path, common_preset & global) const;
57+
58+
// generate presets from cached models
59+
common_presets load_from_cache() const;
60+
61+
// generate presets from local models directory
62+
// for the directory structure, see "Using multiple models" in server/README.md
63+
common_presets load_from_models_dir(const std::string & models_dir) const;
64+
65+
// generate one preset from CLI arguments
66+
common_preset load_from_args(int argc, char ** argv) const;
67+
68+
// cascade multiple presets if exist on both: base < added
69+
// if preset does not exist in base, it will be added without modification
70+
common_presets cascade(const common_presets & base, const common_presets & added) const;
71+
72+
// apply presets over a base preset (same idea as CSS cascading)
73+
common_presets cascade(const common_preset & base, const common_presets & presets) const;
74+
};

tools/server/README.md

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,12 @@ Example:
14431443
```ini
14441444
version = 1
14451445

1446+
; (Optional) This section provides global settings shared across all presets.
1447+
; If the same key is defined in a specific preset, it will override the value in this global section.
1448+
[*]
1449+
c = 8192
1450+
n-gpu-layer = 8
1451+
14461452
; If the key corresponds to an existing model on the server,
14471453
; this will be used as the default config for that model
14481454
[ggml-org/MY-MODEL-GGUF:Q8_0]
@@ -1462,12 +1468,17 @@ model-draft = ./my-models/draft.gguf
14621468
model-draft = /Users/abc/my-models/draft.gguf
14631469

14641470
; If the key does NOT correspond to an existing model,
1465-
; you need to specify at least the model path
1471+
; you need to specify at least the model path or HF repo
14661472
[custom_model]
14671473
model = /Users/abc/my-awesome-model-Q4_K_M.gguf
14681474
```
14691475

1470-
Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upload loading.
1476+
Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upon loading.
1477+
1478+
The precedence rule for preset options is as follows:
1479+
1. **Command-line arguments** passed to `llama-server` (highest priority)
1480+
2. **Model-specific options** defined in the preset file (e.g. `[ggml-org/MY-MODEL...]`)
1481+
3. **Global options** defined in the preset file (`[*]`)
14711482

14721483
### Routing requests
14731484

0 commit comments

Comments
 (0)