@@ -1139,15 +1139,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
11391139 }
11401140 ).set_env (" LLAMA_ARG_CTX_CHECKPOINTS" ).set_examples ({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
11411141 add_opt (common_arg (
1142- {" --cache-ram " , " -cram " }, " N" ,
1142+ {" -cram " , " --cache-ram " }, " N" ,
11431143 string_format (" set the maximum cache size in MiB (default: %d, -1 - no limit, 0 - disable)"
11441144 " [(more info)](https://github.com/ggml-org/llama.cpp/pull/16391)" , params.cache_ram_mib ),
11451145 [](common_params & params, int value) {
11461146 params.cache_ram_mib = value;
11471147 }
11481148 ).set_env (" LLAMA_ARG_CACHE_RAM" ).set_examples ({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
11491149 add_opt (common_arg (
1150- {" --kv-unified " , " -kvu " },
1150+ {" -kvu " , " --kv-unified " },
11511151 " use single unified KV buffer shared across all sequences (default: enabled if number of slots is auto)" ,
11521152 [](common_params & params) {
11531153 params.kv_unified = true ;
@@ -2073,26 +2073,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20732073 }
20742074 ));
20752075 add_opt (common_arg (
2076- {" --override-tensor " , " -ot " }, " <tensor name pattern>=<buffer type>,..." ,
2076+ {" -ot " , " --override-tensor " }, " <tensor name pattern>=<buffer type>,..." ,
20772077 " override tensor buffer type" , [](common_params & params, const std::string & value) {
20782078 parse_tensor_buffer_overrides (value, params.tensor_buft_overrides );
20792079 }
20802080 ));
20812081 add_opt (common_arg (
2082- {" -- override-tensor-draft" , " -otd " }, " <tensor name pattern>=<buffer type>,..." ,
2082+ {" -otd " , " -- override-tensor-draft" }, " <tensor name pattern>=<buffer type>,..." ,
20832083 " override tensor buffer type for draft model" , [](common_params & params, const std::string & value) {
20842084 parse_tensor_buffer_overrides (value, params.speculative .tensor_buft_overrides );
20852085 }
20862086 ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
20872087 add_opt (common_arg (
2088- {" --cpu-moe " , " -cmoe " },
2088+ {" -cmoe " , " --cpu-moe " },
20892089 " keep all Mixture of Experts (MoE) weights in the CPU" ,
20902090 [](common_params & params) {
20912091 params.tensor_buft_overrides .push_back (llm_ffn_exps_cpu_override ());
20922092 }
20932093 ).set_env (" LLAMA_ARG_CPU_MOE" ));
20942094 add_opt (common_arg (
2095- {" -- n-cpu-moe" , " -ncmoe " }, " N" ,
2095+ {" -ncmoe " , " -- n-cpu-moe" }, " N" ,
20962096 " keep the Mixture of Experts (MoE) weights of the first N layers in the CPU" ,
20972097 [](common_params & params, int value) {
20982098 if (value < 0 ) {
@@ -2107,14 +2107,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
21072107 }
21082108 ).set_env (" LLAMA_ARG_N_CPU_MOE" ));
21092109 add_opt (common_arg (
2110- {" -- cpu-moe-draft" , " -cmoed " },
2110+ {" -cmoed " , " -- cpu-moe-draft" },
21112111 " keep all Mixture of Experts (MoE) weights in the CPU for the draft model" ,
21122112 [](common_params & params) {
21132113 params.speculative .tensor_buft_overrides .push_back (llm_ffn_exps_cpu_override ());
21142114 }
21152115 ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env (" LLAMA_ARG_CPU_MOE_DRAFT" ));
21162116 add_opt (common_arg (
2117- {" -- n-cpu-moe-draft" , " -ncmoed " }, " N" ,
2117+ {" -ncmoed " , " -- n-cpu-moe-draft" }, " N" ,
21182118 " keep the Mixture of Experts (MoE) weights of the first N layers in the CPU for the draft model" ,
21192119 [](common_params & params, int value) {
21202120 if (value < 0 ) {
0 commit comments