Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,11 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
}
auto opt = *arg_to_options[arg];
std::string val;
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
// bool arg (need to reverse the meaning for negative args)
bool is_neg = std::find(opt.args_neg.begin(), opt.args_neg.end(), arg) != opt.args_neg.end();
val = is_neg ? "0" : "1";
}
if (opt.value_hint != nullptr) {
// arg with single value
check_arg(i);
Expand Down
191 changes: 187 additions & 4 deletions common/preset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "preset.h"
#include "peg-parser.h"
#include "log.h"
#include "download.h"

#include <fstream>
#include <sstream>
Expand All @@ -15,9 +16,13 @@ static std::string rm_leading_dashes(const std::string & str) {
return str.substr(pos);
}

std::vector<std::string> common_preset::to_args() const {
std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
std::vector<std::string> args;

if (!bin_path.empty()) {
args.push_back(bin_path);
}

for (const auto & [opt, value] : options) {
args.push_back(opt.args.back()); // use the last arg as the main arg
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
Expand Down Expand Up @@ -63,6 +68,52 @@ std::string common_preset::to_ini() const {
return ss.str();
}

void common_preset::set_option(const common_preset_context & ctx, const std::string & env, const std::string & value) {
// try if option exists, update it
for (auto & [opt, val] : options) {
if (opt.env && env == opt.env) {
val = value;
return;
}
}
// if option does not exist, we need to add it
if (ctx.key_to_opt.find(env) == ctx.key_to_opt.end()) {
throw std::runtime_error(string_format(
"%s: option with env '%s' not found in ctx_params",
__func__, env.c_str()
));
}
options[ctx.key_to_opt.at(env)] = value;
}

void common_preset::unset_option(const std::string & env) {
for (auto it = options.begin(); it != options.end(); ) {
const common_arg & opt = it->first;
if (opt.env && env == opt.env) {
it = options.erase(it);
return;
} else {
++it;
}
}
}

bool common_preset::get_option(const std::string & env, std::string & value) const {
for (const auto & [opt, val] : options) {
if (opt.env && env == opt.env) {
value = val;
return true;
}
}
return false;
}

void common_preset::merge(const common_preset & other) {
for (const auto & [opt, val] : other.options) {
options[opt] = val; // overwrite existing options
}
}

static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
std::map<std::string, std::map<std::string, std::string>> parsed;

Expand Down Expand Up @@ -172,9 +223,12 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke
return value;
}

common_presets common_presets_load(const std::string & path, common_params_context & ctx_params) {
common_preset_context::common_preset_context(common_params & default_params, llama_example ex)
: ctx_params(common_params_parser_init(default_params, ex)),
key_to_opt(get_map_key_opt(ctx_params)) {}

common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
common_presets out;
auto key_to_opt = get_map_key_opt(ctx_params);
auto ini_data = parse_ini_from_file(path);

for (auto section : ini_data) {
Expand All @@ -188,7 +242,7 @@ common_presets common_presets_load(const std::string & path, common_params_conte
for (const auto & [key, value] : section.second) {
LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
if (key_to_opt.find(key) != key_to_opt.end()) {
auto & opt = key_to_opt[key];
const auto & opt = key_to_opt.at(key);
if (is_bool_arg(opt)) {
preset.options[opt] = parse_bool_arg(opt, key, value);
} else {
Expand All @@ -199,8 +253,137 @@ common_presets common_presets_load(const std::string & path, common_params_conte
// TODO: maybe warn about unknown key?
}
}

if (preset.name == "*") {
// handle global preset
global = preset;
} else {
out[preset.name] = preset;
}
}

return out;
}

common_presets common_preset_context::load_from_cache() const {
common_presets out;

auto cached_models = common_list_cached_models();
for (const auto & model : cached_models) {
common_preset preset;
preset.name = model.to_string();
preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string());
out[preset.name] = preset;
}

return out;
}

struct local_model {
std::string name;
std::string path;
std::string path_mmproj;
};

common_presets common_preset_context::load_from_models_dir(const std::string & models_dir) const {
if (!std::filesystem::exists(models_dir) || !std::filesystem::is_directory(models_dir)) {
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", models_dir.c_str()));
}

std::vector<local_model> models;
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
auto files = fs_list(subdir_path, false);
common_file_info model_file;
common_file_info first_shard_file;
common_file_info mmproj_file;
for (const auto & file : files) {
if (string_ends_with(file.name, ".gguf")) {
if (file.name.find("mmproj") != std::string::npos) {
mmproj_file = file;
} else if (file.name.find("-00001-of-") != std::string::npos) {
first_shard_file = file;
} else {
model_file = file;
}
}
}
// single file model
local_model model{
/* name */ name,
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
/* path_mmproj */ mmproj_file.path // can be empty
};
if (!model.path.empty()) {
models.push_back(model);
}
};
Comment on lines +294 to +319
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Ambiguous behavior when multiple .gguf files exist in a directory.

In the scan_subdir lambda, if multiple non-sharded, non-mmproj .gguf files exist, the code will overwrite model_file on each iteration (lines 300-307), ultimately using the last file encountered. Similarly, multiple mmproj files would result in using the last one. This creates non-deterministic model selection that depends on filesystem ordering.

Consider either:

  • Raising an error when multiple candidate files are found
  • Implementing explicit prioritization logic (e.g., prefer a specific naming pattern)
  • Logging a warning when ambiguity is detected


auto files = fs_list(models_dir, true);
for (const auto & file : files) {
if (file.is_dir) {
scan_subdir(file.path, file.name);
} else if (string_ends_with(file.name, ".gguf")) {
// single file model
std::string name = file.name;
string_replace_all(name, ".gguf", "");
local_model model{
/* name */ name,
/* path */ file.path,
/* path_mmproj */ ""
};
models.push_back(model);
}
}

// convert local models to presets
common_presets out;
for (const auto & model : models) {
common_preset preset;
preset.name = model.name;
preset.set_option(*this, "LLAMA_ARG_MODEL", model.path);
if (!model.path_mmproj.empty()) {
preset.set_option(*this, "LLAMA_ARG_MMPROJ", model.path_mmproj);
}
out[preset.name] = preset;
}

return out;
}

common_preset common_preset_context::load_from_args(int argc, char ** argv) const {
common_preset preset;
preset.name = COMMON_PRESET_DEFAULT_NAME;

bool ok = common_params_to_map(argc, argv, ctx_params.ex, preset.options);
if (!ok) {
throw std::runtime_error("failed to parse CLI arguments into preset");
}

return preset;
}

common_presets common_preset_context::cascade(const common_presets & base, const common_presets & added) const {
common_presets out = base; // copy
for (const auto & [name, preset_added] : added) {
if (out.find(name) != out.end()) {
// if exists, merge
common_preset & target = out[name];
target.merge(preset_added);
} else {
// otherwise, add directly
out[name] = preset_added;
}
}
return out;
}

common_presets common_preset_context::cascade(const common_preset & base, const common_presets & presets) const {
common_presets out;
for (const auto & [name, preset] : presets) {
common_preset tmp = base; // copy
tmp.name = name;
tmp.merge(preset);
out[name] = std::move(tmp);
}
return out;
}
47 changes: 44 additions & 3 deletions common/preset.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,61 @@

constexpr const char * COMMON_PRESET_DEFAULT_NAME = "default";

struct common_preset_context;

struct common_preset {
std::string name;
// TODO: support repeated args in the future

// options are stored as common_arg to string mapping, representing CLI arg and its value
std::map<common_arg, std::string> options;

// convert preset to CLI argument list
std::vector<std::string> to_args() const;
std::vector<std::string> to_args(const std::string & bin_path = "") const;

// convert preset to INI format string
std::string to_ini() const;

// TODO: maybe implement to_env() if needed

// modify preset options where argument is identified by its env variable
void set_option(const common_preset_context & ctx, const std::string & env, const std::string & value);

// unset option by its env variable
void unset_option(const std::string & env);

// get option value by its env variable, return false if not found
bool get_option(const std::string & env, std::string & value) const;

// merge another preset into this one, overwriting existing options
void merge(const common_preset & other);
};

// interface for multiple presets in one file
using common_presets = std::map<std::string, common_preset>;
common_presets common_presets_load(const std::string & path, common_params_context & ctx_params);

// context for loading and editing presets
struct common_preset_context {
common_params_context ctx_params;
std::map<std::string, common_arg> key_to_opt;
common_preset_context(common_params & default_params, llama_example ex);

// load presets from INI file
common_presets load_from_ini(const std::string & path, common_preset & global) const;

// generate presets from cached models
common_presets load_from_cache() const;

// generate presets from local models directory
// for the directory structure, see "Using multiple models" in server/README.md
common_presets load_from_models_dir(const std::string & models_dir) const;

// generate one preset from CLI arguments
common_preset load_from_args(int argc, char ** argv) const;

// cascade multiple presets if exist on both: base < added
// if preset does not exist in base, it will be added without modification
common_presets cascade(const common_presets & base, const common_presets & added) const;

// apply presets over a base preset (same idea as CSS cascading)
common_presets cascade(const common_preset & base, const common_presets & presets) const;
};
15 changes: 13 additions & 2 deletions tools/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1443,6 +1443,12 @@ Example:
```ini
version = 1

; (Optional) This section provides global settings shared across all presets.
; If the same key is defined in a specific preset, it will override the value in this global section.
[*]
c = 8192
n-gpu-layer = 8

; If the key corresponds to an existing model on the server,
; this will be used as the default config for that model
[ggml-org/MY-MODEL-GGUF:Q8_0]
Expand All @@ -1462,12 +1468,17 @@ model-draft = ./my-models/draft.gguf
model-draft = /Users/abc/my-models/draft.gguf

; If the key does NOT correspond to an existing model,
; you need to specify at least the model path
; you need to specify at least the model path or HF repo
[custom_model]
model = /Users/abc/my-awesome-model-Q4_K_M.gguf
```

Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upload loading.
Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upon loading.

The precedence rule for preset options is as follows:
1. **Command-line arguments** passed to `llama-server` (highest priority)
2. **Model-specific options** defined in the preset file (e.g. `[ggml-org/MY-MODEL...]`)
3. **Global options** defined in the preset file (`[*]`)

### Routing requests

Expand Down
Loading
Loading