From 4e475a8b7435d88939b9f4a8272f467f1997928a Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 18 Dec 2025 13:46:52 +0100 Subject: [PATCH 1/8] presets: refactor, allow cascade presets from different sources --- common/arg.cpp | 4 +- common/preset.cpp | 193 +++++++++++++++++++- common/preset.h | 47 ++++- tools/server/server-models.cpp | 316 +++++++++------------------------ tools/server/server-models.h | 23 +-- 5 files changed, 325 insertions(+), 258 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index b6d16168ebc..4a0ec656bdd 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -873,7 +873,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex sampler_type_chars += common_sampler_type_to_chr(sampler); sampler_type_names += common_sampler_type_to_str(sampler) + ";"; } - sampler_type_names.pop_back(); + if (!sampler_type_names.empty()) { + sampler_type_names.pop_back(); // remove last semicolon + } /** diff --git a/common/preset.cpp b/common/preset.cpp index 60746aad581..9a21fe75e64 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -2,6 +2,7 @@ #include "preset.h" #include "peg-parser.h" #include "log.h" +#include "download.h" #include #include @@ -15,9 +16,13 @@ static std::string rm_leading_dashes(const std::string & str) { return str.substr(pos); } -std::vector common_preset::to_args() const { +std::vector common_preset::to_args(const std::string & bin_path) const { std::vector args; + if (!bin_path.empty()) { + args.push_back(bin_path); + } + for (const auto & [opt, value] : options) { args.push_back(opt.args.back()); // use the last arg as the main arg if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) { @@ -63,6 +68,52 @@ std::string common_preset::to_ini() const { return ss.str(); } +void common_preset::set_option(const common_preset_context & ctx, const std::string & env, const std::string & value) { + // try if option exists, update it + for (auto & [opt, val] : options) { + if (opt.env && env == opt.env) { + val = value; + return; + } + } + // if option does not exist, we need to add it + if (ctx.key_to_opt.find(env) == ctx.key_to_opt.end()) { + throw std::runtime_error(string_format( + "%s: option with env '%s' not found in ctx_params", + __func__, env.c_str() + )); + } + options[ctx.key_to_opt.at(env)] = value; +} + +void common_preset::unset_option(const std::string & env) { + for (auto it = options.begin(); it != options.end(); ) { + const common_arg & opt = it->first; + if (opt.env && env == opt.env) { + it = options.erase(it); + return; + } else { + ++it; + } + } +} + +bool common_preset::get_option(const std::string & env, std::string & value) const { + for (const auto & [opt, val] : options) { + if (opt.env && env == opt.env) { + value = val; + return true; + } + } + return false; +} + +void common_preset::merge(const common_preset & other) { + for (const auto & [opt, val] : other.options) { + options[opt] = val; // overwrite existing options + } +} + static std::map> parse_ini_from_file(const std::string & path) { std::map> parsed; @@ -172,9 +223,12 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke return value; } -common_presets common_presets_load(const std::string & path, common_params_context & ctx_params) { +common_preset_context::common_preset_context(common_params & default_params, llama_example ex) + : ctx_params(common_params_parser_init(default_params, ex)), + key_to_opt(get_map_key_opt(ctx_params)) {} + +common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const { common_presets out; - auto key_to_opt = get_map_key_opt(ctx_params); auto ini_data = parse_ini_from_file(path); for (auto section : ini_data) { @@ -188,7 +242,7 @@ common_presets common_presets_load(const std::string & path, common_params_conte for (const auto & [key, value] : section.second) { LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str()); if (key_to_opt.find(key) != key_to_opt.end()) { - auto & opt = key_to_opt[key]; + const auto & opt = key_to_opt.at(key); if (is_bool_arg(opt)) { preset.options[opt] = parse_bool_arg(opt, key, value); } else { @@ -199,8 +253,139 @@ common_presets common_presets_load(const std::string & path, common_params_conte // TODO: maybe warn about unknown key? } } + + if (preset.name == "*") { + // handle global preset + global = preset; + } else { + out[preset.name] = preset; + } + } + + return out; +} + +common_presets common_preset_context::load_from_cache() const { + common_presets out; + + auto cached_models = common_list_cached_models(); + for (const auto & model : cached_models) { + common_preset preset; + preset.name = model.to_string(); + preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string()); + out[preset.name] = preset; + } + + return out; +} + +struct local_model { + std::string name; + std::string path; + std::string path_mmproj; +}; + +common_presets common_preset_context::load_from_models_dir(const std::string & models_dir) const { + if (!std::filesystem::exists(models_dir) || !std::filesystem::is_directory(models_dir)) { + throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", models_dir.c_str())); + } + + std::vector models; + auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) { + auto files = fs_list(subdir_path, false); + common_file_info model_file; + common_file_info first_shard_file; + common_file_info mmproj_file; + for (const auto & file : files) { + if (string_ends_with(file.name, ".gguf")) { + if (file.name.find("mmproj") != std::string::npos) { + mmproj_file = file; + } else if (file.name.find("-00001-of-") != std::string::npos) { + first_shard_file = file; + } else { + model_file = file; + } + } + } + // single file model + local_model model{ + /* name */ name, + /* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path, + /* path_mmproj */ mmproj_file.path // can be empty + }; + if (!model.path.empty()) { + models.push_back(model); + } + }; + + auto files = fs_list(models_dir, true); + for (const auto & file : files) { + if (file.is_dir) { + scan_subdir(file.path, file.name); + } else if (string_ends_with(file.name, ".gguf")) { + // single file model + std::string name = file.name; + string_replace_all(name, ".gguf", ""); + local_model model{ + /* name */ name, + /* path */ file.path, + /* path_mmproj */ "" + }; + models.push_back(model); + } + } + + // convert local models to presets + common_presets out; + common_preset base; + base.set_option(*this, "LLAMA_ARG_MODEL", ""); + base.set_option(*this, "LLAMA_ARG_MMPROJ", ""); + for (const auto & model : models) { + common_preset preset = base; // copy + preset.name = model.name; + preset.set_option(*this, "LLAMA_ARG_MODEL", model.path); + if (!model.path_mmproj.empty()) { + preset.set_option(*this, "LLAMA_ARG_MMPROJ", model.path_mmproj); + } out[preset.name] = preset; } return out; } + +common_preset common_preset_context::load_from_args(int argc, char ** argv) const { + common_preset preset; + preset.name = COMMON_PRESET_DEFAULT_NAME; + + bool ok = common_params_to_map(argc, argv, ctx_params.ex, preset.options); + if (!ok) { + throw std::runtime_error("failed to parse CLI arguments into preset"); + } + + return preset; +} + +common_presets common_preset_context::cascade(const common_presets & base, const common_presets & added) const { + common_presets out = base; // copy + for (const auto & [name, preset_added] : added) { + if (out.find(name) != out.end()) { + // if exists, merge + common_preset & target = out[name]; + target.merge(preset_added); + } else { + // otherwise, add directly + out[name] = preset_added; + } + } + return out; +} + +common_presets common_preset_context::cascade(const common_preset & base, const common_presets & presets) const { + common_presets out; + for (const auto & [name, preset] : presets) { + common_preset tmp = base; // copy + tmp.merge(preset); + out[name] = std::move(tmp); + } + return out; +} diff --git a/common/preset.h b/common/preset.h index dceb849eb81..d7e06a8f872 100644 --- a/common/preset.h +++ b/common/preset.h @@ -13,20 +13,61 @@ constexpr const char * COMMON_PRESET_DEFAULT_NAME = "default"; +struct common_preset_context; + struct common_preset { std::string name; - // TODO: support repeated args in the future + + // options are stored as common_arg to string mapping, representing CLI arg and its value std::map options; // convert preset to CLI argument list - std::vector to_args() const; + std::vector to_args(const std::string & bin_path = "") const; // convert preset to INI format string std::string to_ini() const; // TODO: maybe implement to_env() if needed + + // modify preset options where argument is identified by its env variable + void set_option(const common_preset_context & ctx, const std::string & env, const std::string & value); + + // unset option by its env variable + void unset_option(const std::string & env); + + // get option value by its env variable, return false if not found + bool get_option(const std::string & env, std::string & value) const; + + // merge another preset into this one, overwriting existing options + void merge(const common_preset & other); }; // interface for multiple presets in one file using common_presets = std::map; -common_presets common_presets_load(const std::string & path, common_params_context & ctx_params); + +// context for loading and editing presets +struct common_preset_context { + common_params_context ctx_params; + std::map key_to_opt; + common_preset_context(common_params & default_params, llama_example ex); + + // load presets from INI file + common_presets load_from_ini(const std::string & path, common_preset & global) const; + + // generate presets from cached models + common_presets load_from_cache() const; + + // generate presets from local models directory + // for the directory structure, see "Using multiple models" in server/README.md + common_presets load_from_models_dir(const std::string & models_dir) const; + + // generate one preset from CLI arguments + common_preset load_from_args(int argc, char ** argv) const; + + // cascade multiple presets if exist on both: base < added + // if preset does not exist in base, it will be added without modification + common_presets cascade(const common_presets & base, const common_presets & added) const; + + // apply presets over a base preset (same idea as CSS cascading) + common_presets cascade(const common_preset & base, const common_presets & presets) const; +}; diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index c1f86e54933..1b70a395074 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -82,154 +82,29 @@ static std::filesystem::path get_server_exec_path() { #endif } -struct local_model { - std::string name; - std::string path; - std::string path_mmproj; -}; - -static std::vector list_local_models(const std::string & dir) { - if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) { - throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str())); - } - - std::vector models; - auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) { - auto files = fs_list(subdir_path, false); - common_file_info model_file; - common_file_info first_shard_file; - common_file_info mmproj_file; - for (const auto & file : files) { - if (string_ends_with(file.name, ".gguf")) { - if (file.name.find("mmproj") != std::string::npos) { - mmproj_file = file; - } else if (file.name.find("-00001-of-") != std::string::npos) { - first_shard_file = file; - } else { - model_file = file; - } - } - } - // single file model - local_model model{ - /* name */ name, - /* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path, - /* path_mmproj */ mmproj_file.path // can be empty - }; - if (!model.path.empty()) { - models.push_back(model); - } - }; - - auto files = fs_list(dir, true); - for (const auto & file : files) { - if (file.is_dir) { - scan_subdir(file.path, file.name); - } else if (string_ends_with(file.name, ".gguf")) { - // single file model - std::string name = file.name; - string_replace_all(name, ".gguf", ""); - local_model model{ - /* name */ name, - /* path */ file.path, - /* path_mmproj */ "" - }; - models.push_back(model); - } - } - return models; -} - -// -// server_presets -// - - -server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path) - : ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) { - if (!presets_path.empty()) { - presets = common_presets_load(presets_path, ctx_params); - SRV_INF("Loaded %zu presets from %s\n", presets.size(), presets_path.c_str()); - } - - // populate reserved args (will be appended by the router) - for (auto & opt : ctx_params.options) { - if (opt.env == nullptr) { - continue; - } - std::string env = opt.env; - if (env == "LLAMA_ARG_PORT" || - env == "LLAMA_ARG_HOST" || - env == "LLAMA_ARG_ALIAS" || - env == "LLAMA_ARG_API_KEY" || - env == "LLAMA_ARG_MODELS_DIR" || - env == "LLAMA_ARG_MODELS_MAX" || - env == "LLAMA_ARG_MODELS_PRESET" || - env == "LLAMA_ARG_MODEL" || - env == "LLAMA_ARG_MMPROJ" || - env == "LLAMA_ARG_HF_REPO" || - env == "LLAMA_ARG_NO_MODELS_AUTOLOAD" || - env == "LLAMA_ARG_SSL_KEY_FILE" || - env == "LLAMA_ARG_SSL_CERT_FILE") { - control_args[env] = opt; - } - } - - // read base args from router's argv - common_params_to_map(argc, argv, LLAMA_EXAMPLE_SERVER, base_args); - - // remove any router-controlled args from base_args - for (const auto & cargs : control_args) { - auto it = base_args.find(cargs.second); - if (it != base_args.end()) { - base_args.erase(it); - } - } -} - -common_preset server_presets::get_preset(const std::string & name) { - auto it = presets.find(name); - if (it != presets.end()) { - return it->second; - } - return common_preset(); -} - -void server_presets::render_args(server_model_meta & meta) { - common_preset preset = meta.preset; // copy - // merging 3 kinds of args: - // 1. model-specific args (from preset) - // force removing control args if any - for (auto & cargs : control_args) { - if (preset.options.find(cargs.second) != preset.options.end()) { - SRV_WRN("Preset '%s' contains reserved arg '%s', removing it\n", preset.name.c_str(), cargs.second.args[0]); - preset.options.erase(cargs.second); - } - } - // 2. base args (from router) - // inherit from base args - for (const auto & [arg, value] : base_args) { - preset.options[arg] = value; - } - // 3. control args (from router) - // set control values - preset.options[control_args["LLAMA_ARG_HOST"]] = CHILD_ADDR; - preset.options[control_args["LLAMA_ARG_PORT"]] = std::to_string(meta.port); - preset.options[control_args["LLAMA_ARG_ALIAS"]] = meta.name; - if (meta.in_cache) { - preset.options[control_args["LLAMA_ARG_HF_REPO"]] = meta.name; - } else { - preset.options[control_args["LLAMA_ARG_MODEL"]] = meta.path; - if (!meta.path_mmproj.empty()) { - preset.options[control_args["LLAMA_ARG_MMPROJ"]] = meta.path_mmproj; - } - } - // disable SSL for child processes (HTTPS already handled by router) - preset.options[control_args["LLAMA_ARG_SSL_KEY_FILE"]] = ""; - preset.options[control_args["LLAMA_ARG_SSL_CERT_FILE"]] = ""; - meta.args = preset.to_args(); - // add back the binary path at the front - meta.args.insert(meta.args.begin(), get_server_exec_path().string()); +static void unset_reserved_args(common_preset & preset, bool unset_model_args) { + preset.unset_option("LLAMA_ARG_SSL_KEY_FILE"); + preset.unset_option("LLAMA_ARG_SSL_CERT_FILE"); + preset.unset_option("LLAMA_ARG_API_KEY"); + preset.unset_option("LLAMA_ARG_MODELS_DIR"); + preset.unset_option("LLAMA_ARG_MODELS_MAX"); + preset.unset_option("LLAMA_ARG_MODELS_PRESET"); + preset.unset_option("LLAMA_ARG_NO_MODELS_AUTOLOAD"); + if (unset_model_args) { + preset.unset_option("LLAMA_ARG_MODEL"); + preset.unset_option("LLAMA_ARG_MMPROJ"); + preset.unset_option("LLAMA_ARG_HF_REPO"); + } +} + +void server_model_meta::update_args(common_preset_context & ctx_preset, std::string bin_path) { + // update params + unset_reserved_args(preset, false); + preset.set_option(ctx_preset, "LLAMA_ARG_HOST", CHILD_ADDR); + preset.set_option(ctx_preset, "LLAMA_ARG_PORT", std::to_string(port)); + preset.set_option(ctx_preset, "LLAMA_ARG_ALIAS", name); + // render args + args = preset.to_args(bin_path); } // @@ -240,20 +115,20 @@ server_models::server_models( const common_params & params, int argc, char ** argv, - char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) { - for (int i = 0; i < argc; i++) { - base_args.push_back(std::string(argv[i])); - } + char ** envp) + : ctx_preset(base_params, LLAMA_EXAMPLE_SERVER), + base_params(params), + base_preset(ctx_preset.load_from_args(argc, argv)) { for (char ** env = envp; *env != nullptr; env++) { base_env.push_back(std::string(*env)); } - GGML_ASSERT(!base_args.empty()); // set binary path try { - base_args[0] = get_server_exec_path().string(); + bin_path = get_server_exec_path().string(); } catch (const std::exception & e) { + bin_path = argv[0]; LOG_WRN("failed to get server executable path: %s\n", e.what()); - LOG_WRN("using original argv[0] as fallback: %s\n", base_args[0].c_str()); + LOG_WRN("using original argv[0] as fallback: %s\n", argv[0]); } load_models(); } @@ -262,7 +137,7 @@ void server_models::add_model(server_model_meta && meta) { if (mapping.find(meta.name) != mapping.end()) { throw std::runtime_error(string_format("model '%s' appears multiple times", meta.name.c_str())); } - presets.render_args(meta); // populate meta.args + meta.update_args(ctx_preset, bin_path); // render args std::string name = meta.name; mapping[name] = instance_t{ /* subproc */ std::make_shared(), @@ -271,86 +146,59 @@ void server_models::add_model(server_model_meta && meta) { }; } -static std::vector list_custom_path_models(server_presets & presets) { - // detect any custom-path models in presets - std::vector custom_models; - for (auto & [model_name, preset] : presets.presets) { - local_model model; - model.name = model_name; - std::vector to_erase; - for (auto & [arg, value] : preset.options) { - std::string env(arg.env ? arg.env : ""); - if (env == "LLAMA_ARG_MODEL") { - model.path = value; - to_erase.push_back(arg); - } - if (env == "LLAMA_ARG_MMPROJ") { - model.path_mmproj = value; - to_erase.push_back(arg); - } - } - for (auto & arg : to_erase) { - preset.options.erase(arg); - } - if (!model.name.empty() && !model.path.empty()) { - custom_models.push_back(model); - } - } - return custom_models; -} - // TODO: allow refreshing cached model list void server_models::load_models() { // loading models from 3 sources: // 1. cached models - auto cached_models = common_list_cached_models(); - for (const auto & model : cached_models) { - server_model_meta meta{ - /* preset */ presets.get_preset(model.to_string()), - /* name */ model.to_string(), - /* path */ model.manifest_path, - /* path_mmproj */ "", // auto-detected when loading - /* in_cache */ true, - /* port */ 0, - /* status */ SERVER_MODEL_STATUS_UNLOADED, - /* last_used */ 0, - /* args */ std::vector(), - /* exit_code */ 0 - }; - add_model(std::move(meta)); - } - // 2. local models specificed via --models-dir + common_presets cached_models = ctx_preset.load_from_cache(); + SRV_INF("Loaded %zu cached model presets\n", cached_models.size()); + // 2. local models from --models-dir + common_presets local_models; if (!base_params.models_dir.empty()) { - auto local_models = list_local_models(base_params.models_dir); - for (const auto & model : local_models) { - if (mapping.find(model.name) != mapping.end()) { - // already exists in cached models, skip - continue; - } - server_model_meta meta{ - /* preset */ presets.get_preset(model.name), - /* name */ model.name, - /* path */ model.path, - /* path_mmproj */ model.path_mmproj, - /* in_cache */ false, - /* port */ 0, - /* status */ SERVER_MODEL_STATUS_UNLOADED, - /* last_used */ 0, - /* args */ std::vector(), - /* exit_code */ 0 - }; - add_model(std::move(meta)); + local_models = ctx_preset.load_from_models_dir(base_params.models_dir); + SRV_INF("Loaded %zu local model presets from %s\n", local_models.size(), base_params.models_dir.c_str()); + } + // 3. custom-path models from presets + common_preset global = {}; + common_presets custom_models = ctx_preset.load_from_ini(base_params.models_preset, global); + SRV_INF("Loaded %zu custom model presets from %s\n", custom_models.size(), base_params.models_preset.c_str()); + + // cascade, apply global preset first + cached_models = ctx_preset.cascade(global, cached_models); + local_models = ctx_preset.cascade(global, local_models); + custom_models = ctx_preset.cascade(global, custom_models); + + // note: if a model exists in both cached and local, local takes precedence + common_presets final_presets; + for (const auto & [name, preset] : cached_models) { + final_presets[name] = preset; + } + for (const auto & [name, preset] : local_models) { + final_presets[name] = preset; + } + + // process custom presets from INI + for (const auto & [name, custom] : custom_models) { + if (final_presets.find(name) != final_presets.end()) { + // apply custom config if exists + common_preset & target = final_presets[name]; + target.merge(custom); + } else { + // otherwise add directly + final_presets[name] = custom; } } - // 3. custom-path models specified in presets - auto custom_models = list_custom_path_models(presets); - for (const auto & model : custom_models) { + + // server base preset from CLI args take highest precedence + for (auto & [name, preset] : final_presets) { + preset.merge(base_preset); + } + + // convert presets to server_model_meta and add to mapping + for (const auto & preset : final_presets) { server_model_meta meta{ - /* preset */ presets.get_preset(model.name), - /* name */ model.name, - /* path */ model.path, - /* path_mmproj */ model.path_mmproj, - /* in_cache */ false, + /* preset */ preset.second, + /* name */ preset.first, /* port */ 0, /* status */ SERVER_MODEL_STATUS_UNLOADED, /* last_used */ 0, @@ -359,10 +207,16 @@ void server_models::load_models() { }; add_model(std::move(meta)); } + // log available models + std::unordered_set custom_names; + for (const auto & [name, preset] : custom_models) { + custom_names.insert(name); + } SRV_INF("Available models (%zu) (*: custom preset)\n", mapping.size()); for (const auto & [name, inst] : mapping) { - SRV_INF(" %c %s\n", inst.meta.preset.name.empty() ? ' ' : '*', name.c_str()); + bool has_custom = custom_names.find(name) != custom_names.end(); + SRV_INF(" %c %s\n", has_custom ? '*' : ' ', name.c_str()); } } @@ -526,7 +380,7 @@ void server_models::load(const std::string & name) { { SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port); - presets.render_args(inst.meta); // update meta.args + inst.meta.update_args(ctx_preset, bin_path); // render args std::vector child_args = inst.meta.args; // copy std::vector child_env = base_env; // copy @@ -888,8 +742,6 @@ void server_models_routes::init_routes() { {"object", "model"}, // for OAI-compat {"owned_by", "llamacpp"}, // for OAI-compat {"created", t}, // for OAI-compat - {"in_cache", meta.in_cache}, - {"path", meta.path}, {"status", status}, // TODO: add other fields, may require reading GGUF metadata }); diff --git a/tools/server/server-models.h b/tools/server/server-models.h index cbc4c432460..56fb398e311 100644 --- a/tools/server/server-models.h +++ b/tools/server/server-models.h @@ -51,9 +51,6 @@ static std::string server_model_status_to_string(server_model_status status) { struct server_model_meta { common_preset preset; std::string name; - std::string path; - std::string path_mmproj; // only available if in_cache=false - bool in_cache = false; // if true, use -hf; use -m otherwise int port = 0; server_model_status status = SERVER_MODEL_STATUS_UNLOADED; int64_t last_used = 0; // for LRU unloading @@ -67,19 +64,8 @@ struct server_model_meta { bool is_failed() const { return status == SERVER_MODEL_STATUS_UNLOADED && exit_code != 0; } -}; -// the server_presets struct holds the presets read from presets.ini -// as well as base args from the router server -struct server_presets { - common_presets presets; - common_params_context ctx_params; - std::map base_args; - std::map control_args; // args reserved for server control - - server_presets(int argc, char ** argv, common_params & base_params, const std::string & models_dir); - common_preset get_preset(const std::string & name); - void render_args(server_model_meta & meta); + void update_args(common_preset_context & ctx_presets, std::string bin_path); }; struct subprocess_s; @@ -97,11 +83,12 @@ struct server_models { std::condition_variable cv; std::map mapping; + common_preset_context ctx_preset; + common_params base_params; - std::vector base_args; + std::string bin_path; std::vector base_env; - - server_presets presets; + common_preset base_preset; // base preset from llama-server CLI args void update_meta(const std::string & name, const server_model_meta & meta); From 5abab16d75e1aa861a186e1eccd01db6a66b7eb7 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 18 Dec 2025 13:56:25 +0100 Subject: [PATCH 2/8] update docs --- tools/server/README.md | 15 +++++++++++++-- tools/server/server-models.cpp | 2 ++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/server/README.md b/tools/server/README.md index fd5a59e848d..88937b577e7 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -1443,6 +1443,12 @@ Example: ```ini version = 1 +; (Optional) This section provides global settings shared across all presets. +; If the same key is defined in a specific preset, it will override the value in this global section. +[*] +c = 8192 +n-gpu-layer = 8 + ; If the key corresponds to an existing model on the server, ; this will be used as the default config for that model [ggml-org/MY-MODEL-GGUF:Q8_0] @@ -1462,12 +1468,17 @@ model-draft = ./my-models/draft.gguf model-draft = /Users/abc/my-models/draft.gguf ; If the key does NOT correspond to an existing model, -; you need to specify at least the model path +; you need to specify at least the model path or HF repo [custom_model] model = /Users/abc/my-awesome-model-Q4_K_M.gguf ``` -Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upload loading. +Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upon loading. + +The precedence rule for preset options is as follows: +1. **Command-line arguments** passed to `llama-server` (highest priority) +2. **Model-specific options** defined in the preset file (e.g. `[ggml-org/MY-MODEL...]`) +3. **Global options** defined in the preset file (`[*]`) ### Routing requests diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 1b70a395074..db85d514bf5 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -122,6 +122,8 @@ server_models::server_models( for (char ** env = envp; *env != nullptr; env++) { base_env.push_back(std::string(*env)); } + // clean up base preset + unset_reserved_args(base_preset, true); // set binary path try { bin_path = get_server_exec_path().string(); From 0d04bba5e81e0a262f08b342dd9b9c112e6bdb7d Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 18 Dec 2025 14:03:26 +0100 Subject: [PATCH 3/8] fix neg arg handling --- common/arg.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/arg.cpp b/common/arg.cpp index 4a0ec656bdd..5f16a3b3b4a 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -772,6 +772,11 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map Date: Thu, 18 Dec 2025 14:06:04 +0100 Subject: [PATCH 4/8] fix empty mmproj --- common/preset.cpp | 5 +---- tools/server/server-models.cpp | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/common/preset.cpp b/common/preset.cpp index 9a21fe75e64..0b2c7196967 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -337,11 +337,8 @@ common_presets common_preset_context::load_from_models_dir(const std::string & m // convert local models to presets common_presets out; - common_preset base; - base.set_option(*this, "LLAMA_ARG_MODEL", ""); - base.set_option(*this, "LLAMA_ARG_MMPROJ", ""); for (const auto & model : models) { - common_preset preset = base; // copy + common_preset preset; preset.name = model.name; preset.set_option(*this, "LLAMA_ARG_MODEL", model.path); if (!model.path_mmproj.empty()) { diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index db85d514bf5..0cf0e22866e 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -103,6 +103,7 @@ void server_model_meta::update_args(common_preset_context & ctx_preset, std::str preset.set_option(ctx_preset, "LLAMA_ARG_HOST", CHILD_ADDR); preset.set_option(ctx_preset, "LLAMA_ARG_PORT", std::to_string(port)); preset.set_option(ctx_preset, "LLAMA_ARG_ALIAS", name); + // TODO: maybe validate preset before rendering ? // render args args = preset.to_args(bin_path); } From 4004c4770f71854bb661d9fce29abb71c9d51668 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 18 Dec 2025 14:12:38 +0100 Subject: [PATCH 5/8] also filter out server-controlled args before to_ini() --- common/preset.cpp | 1 + tools/server/server-models.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/common/preset.cpp b/common/preset.cpp index 0b2c7196967..fd12176fe91 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -381,6 +381,7 @@ common_presets common_preset_context::cascade(const common_preset & base, const common_presets out; for (const auto & [name, preset] : presets) { common_preset tmp = base; // copy + tmp.name = name; tmp.merge(preset); out[name] = std::move(tmp); } diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 0cf0e22866e..e86b3bc2cc0 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -734,7 +734,12 @@ void server_models_routes::init_routes() { {"args", meta.args}, }; if (!meta.preset.name.empty()) { - status["preset"] = meta.preset.to_ini(); + common_preset preset_copy = meta.preset; + unset_reserved_args(preset_copy, false); + preset_copy.unset_option("LLAMA_ARG_HOST"); + preset_copy.unset_option("LLAMA_ARG_PORT"); + preset_copy.unset_option("LLAMA_ARG_ALIAS"); + status["preset"] = preset_copy.to_ini(); } if (meta.is_failed()) { status["exit_code"] = meta.exit_code; From ac6f8ca1cd50eb4e90ba04f0f8c2327c06a32fe9 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 18 Dec 2025 17:33:32 +0100 Subject: [PATCH 6/8] skip loading custom_models if not specified --- tools/server/server-models.cpp | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index e86b3bc2cc0..c2bbdf33e51 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -163,13 +163,16 @@ void server_models::load_models() { } // 3. custom-path models from presets common_preset global = {}; - common_presets custom_models = ctx_preset.load_from_ini(base_params.models_preset, global); - SRV_INF("Loaded %zu custom model presets from %s\n", custom_models.size(), base_params.models_preset.c_str()); + common_presets custom_presets = {}; + if (!base_params.models_preset.empty()) { + custom_presets = ctx_preset.load_from_ini(base_params.models_preset, global); + SRV_INF("Loaded %zu custom model presets from %s\n", custom_presets.size(), base_params.models_preset.c_str()); + } // cascade, apply global preset first - cached_models = ctx_preset.cascade(global, cached_models); - local_models = ctx_preset.cascade(global, local_models); - custom_models = ctx_preset.cascade(global, custom_models); + cached_models = ctx_preset.cascade(global, cached_models); + local_models = ctx_preset.cascade(global, local_models); + custom_presets = ctx_preset.cascade(global, custom_presets); // note: if a model exists in both cached and local, local takes precedence common_presets final_presets; @@ -181,7 +184,7 @@ void server_models::load_models() { } // process custom presets from INI - for (const auto & [name, custom] : custom_models) { + for (const auto & [name, custom] : custom_presets) { if (final_presets.find(name) != final_presets.end()) { // apply custom config if exists common_preset & target = final_presets[name]; @@ -212,14 +215,16 @@ void server_models::load_models() { } // log available models - std::unordered_set custom_names; - for (const auto & [name, preset] : custom_models) { - custom_names.insert(name); - } - SRV_INF("Available models (%zu) (*: custom preset)\n", mapping.size()); - for (const auto & [name, inst] : mapping) { - bool has_custom = custom_names.find(name) != custom_names.end(); - SRV_INF(" %c %s\n", has_custom ? '*' : ' ', name.c_str()); + { + std::unordered_set custom_names; + for (const auto & [name, preset] : custom_presets) { + custom_names.insert(name); + } + SRV_INF("Available models (%zu) (*: custom preset)\n", mapping.size()); + for (const auto & [name, inst] : mapping) { + bool has_custom = custom_names.find(name) != custom_names.end(); + SRV_INF(" %c %s\n", has_custom ? '*' : ' ', name.c_str()); + } } } From 11f8109b69180846db1bd24c4c6410c015d4f0d6 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 18 Dec 2025 22:26:32 +0100 Subject: [PATCH 7/8] fix unset_reserved_args --- tools/server/server-models.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index c2bbdf33e51..30434be96c2 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -85,11 +85,11 @@ static std::filesystem::path get_server_exec_path() { static void unset_reserved_args(common_preset & preset, bool unset_model_args) { preset.unset_option("LLAMA_ARG_SSL_KEY_FILE"); preset.unset_option("LLAMA_ARG_SSL_CERT_FILE"); - preset.unset_option("LLAMA_ARG_API_KEY"); + preset.unset_option("LLAMA_API_KEY"); preset.unset_option("LLAMA_ARG_MODELS_DIR"); preset.unset_option("LLAMA_ARG_MODELS_MAX"); preset.unset_option("LLAMA_ARG_MODELS_PRESET"); - preset.unset_option("LLAMA_ARG_NO_MODELS_AUTOLOAD"); + preset.unset_option("LLAMA_ARG_MODELS_AUTOLOAD"); if (unset_model_args) { preset.unset_option("LLAMA_ARG_MODEL"); preset.unset_option("LLAMA_ARG_MMPROJ"); From 95b79963068d64953d3dd2afad5ad0942ddc4e85 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 18 Dec 2025 22:48:38 +0100 Subject: [PATCH 8/8] fix crash on windows --- common/preset.cpp | 2 +- common/preset.h | 3 ++- tools/server/server-models.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/common/preset.cpp b/common/preset.cpp index fd12176fe91..1aa9864d0aa 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -223,7 +223,7 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke return value; } -common_preset_context::common_preset_context(common_params & default_params, llama_example ex) +common_preset_context::common_preset_context(llama_example ex) : ctx_params(common_params_parser_init(default_params, ex)), key_to_opt(get_map_key_opt(ctx_params)) {} diff --git a/common/preset.h b/common/preset.h index d7e06a8f872..3a84d1be29c 100644 --- a/common/preset.h +++ b/common/preset.h @@ -47,9 +47,10 @@ using common_presets = std::map; // context for loading and editing presets struct common_preset_context { + common_params default_params; // unused for now common_params_context ctx_params; std::map key_to_opt; - common_preset_context(common_params & default_params, llama_example ex); + common_preset_context(llama_example ex); // load presets from INI file common_presets load_from_ini(const std::string & path, common_preset & global) const; diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 30434be96c2..db7ab667f93 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -117,7 +117,7 @@ server_models::server_models( int argc, char ** argv, char ** envp) - : ctx_preset(base_params, LLAMA_EXAMPLE_SERVER), + : ctx_preset(LLAMA_EXAMPLE_SERVER), base_params(params), base_preset(ctx_preset.load_from_args(argc, argv)) { for (char ** env = envp; *env != nullptr; env++) {