From a2e26695c72762e58b2c69801e6e8f280aad4a8e Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Wed, 17 Dec 2025 16:20:22 -0500 Subject: [PATCH] feat: convert models API to use a FastAPI router Migrate from @webmethod decorators to FastAPI router pattern Signed-off-by: Nathan Weinberg --- client-sdks/stainless/openapi.yml | 98 ++++++++---- docs/static/deprecated-llama-stack-spec.yaml | 98 ++++++++---- .../static/experimental-llama-stack-spec.yaml | 71 +++++++++ docs/static/llama-stack-spec.yaml | 96 ++++++++++-- docs/static/stainless-llama-stack-spec.yaml | 98 ++++++++---- src/llama_stack/core/routers/inference.py | 10 +- src/llama_stack/core/routing_tables/models.py | 45 +++++- .../core/server/fastapi_router_registry.py | 3 +- .../inline/batches/reference/batches.py | 3 +- src/llama_stack_api/__init__.py | 6 + src/llama_stack_api/models/__init__.py | 47 ++++++ src/llama_stack_api/models/api.py | 38 +++++ src/llama_stack_api/models/fastapi_routes.py | 104 +++++++++++++ src/llama_stack_api/{ => models}/models.py | 144 ++++++++---------- 14 files changed, 673 insertions(+), 188 deletions(-) create mode 100644 src/llama_stack_api/models/__init__.py create mode 100644 src/llama_stack_api/models/api.py create mode 100644 src/llama_stack_api/models/fastapi_routes.py rename src/llama_stack_api/{ => models}/models.py (58%) diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 590b3b9034..4f334f29d2 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -991,7 +991,7 @@ paths: get: responses: '200': - description: A OpenAIListModelsResponse. + description: A list of OpenAI model objects. content: application/json: schema: @@ -1010,13 +1010,13 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Models - summary: Openai List Models + summary: List models using the OpenAI API. description: List models using the OpenAI API. operationId: openai_list_models_v1_models_get post: responses: '200': - description: A Model. + description: The registered model object. content: application/json: schema: @@ -1035,11 +1035,8 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Models - summary: Register Model - description: |- - Register model. - - Register a model. + summary: Register a model. + description: Register a model. operationId: register_model_v1_models_post requestBody: content: @@ -1052,30 +1049,27 @@ paths: get: responses: '200': - description: A Model. + description: The model object. content: application/json: schema: $ref: '#/components/schemas/Model' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Models - summary: Get Model - description: |- - Get model. - - Get a model by its identifier. + summary: Get a model by its identifier. + description: Get a model by its identifier. operationId: get_model_v1_models__model_id__get parameters: - name: model_id @@ -1083,30 +1077,29 @@ paths: required: true schema: type: string - description: 'Path parameter: model_id' + description: The ID of the model to get. + title: Model Id + description: The ID of the model to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The model was successfully unregistered. tags: - Models - summary: Unregister Model - description: |- - Unregister model. - - Unregister a model. + summary: Unregister a model. + description: Unregister a model. operationId: unregister_model_v1_models__model_id__delete parameters: - name: model_id @@ -1114,7 +1107,9 @@ paths: required: true schema: type: string - description: 'Path parameter: model_id' + description: The ID of the model to unregister. + title: Model Id + description: The ID of the model to unregister. deprecated: true /v1/moderations: post: @@ -6611,10 +6606,12 @@ components: $ref: '#/components/schemas/OpenAIModel' type: array title: Data + description: List of OpenAI model objects. type: object required: - data title: OpenAIListModelsResponse + description: Response containing a list of OpenAI model objects. Model: properties: identifier: @@ -11618,29 +11615,35 @@ components: model_id: type: string title: Model Id + description: The identifier of the model to register. provider_model_id: anyOf: - type: string - type: 'null' + description: The identifier of the model in the provider. provider_id: anyOf: - type: string - type: 'null' + description: The identifier of the provider. metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: Any additional metadata for this model. model_type: anyOf: - $ref: '#/components/schemas/ModelType' title: ModelType - type: 'null' + description: The type of model to register. title: ModelType type: object required: - model_id title: RegisterModelRequest + description: Request model for registering a model. ParamType: discriminator: mapping: @@ -13151,6 +13154,41 @@ components: - dataset_id title: UnregisterDatasetRequest type: object + ListModelsResponse: + description: Response containing a list of model objects. + properties: + data: + description: List of model objects. + items: + $ref: '#/components/schemas/Model' + title: Data + type: array + required: + - data + title: ListModelsResponse + type: object + GetModelRequest: + description: Request model for getting a model by ID. + properties: + model_id: + description: The ID of the model to get. + title: Model Id + type: string + required: + - model_id + title: GetModelRequest + type: object + UnregisterModelRequest: + description: Request model for unregistering a model. + properties: + model_id: + description: The ID of the model to unregister. + title: Model Id + type: string + required: + - model_id + title: UnregisterModelRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 0312e1409f..dee493d910 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -17,7 +17,7 @@ paths: get: responses: '200': - description: A OpenAIListModelsResponse. + description: A list of OpenAI model objects. content: application/json: schema: @@ -36,13 +36,13 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Models - summary: Openai List Models + summary: List models using the OpenAI API. description: List models using the OpenAI API. operationId: openai_list_models_v1_models_get post: responses: '200': - description: A Model. + description: The registered model object. content: application/json: schema: @@ -61,11 +61,8 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Models - summary: Register Model - description: |- - Register model. - - Register a model. + summary: Register a model. + description: Register a model. operationId: register_model_v1_models_post requestBody: content: @@ -78,30 +75,27 @@ paths: get: responses: '200': - description: A Model. + description: The model object. content: application/json: schema: $ref: '#/components/schemas/Model' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Models - summary: Get Model - description: |- - Get model. - - Get a model by its identifier. + summary: Get a model by its identifier. + description: Get a model by its identifier. operationId: get_model_v1_models__model_id__get parameters: - name: model_id @@ -109,30 +103,29 @@ paths: required: true schema: type: string - description: 'Path parameter: model_id' + description: The ID of the model to get. + title: Model Id + description: The ID of the model to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The model was successfully unregistered. tags: - Models - summary: Unregister Model - description: |- - Unregister model. - - Unregister a model. + summary: Unregister a model. + description: Unregister a model. operationId: unregister_model_v1_models__model_id__delete parameters: - name: model_id @@ -140,7 +133,9 @@ paths: required: true schema: type: string - description: 'Path parameter: model_id' + description: The ID of the model to unregister. + title: Model Id + description: The ID of the model to unregister. deprecated: true /v1/scoring-functions: get: @@ -3292,10 +3287,12 @@ components: $ref: '#/components/schemas/OpenAIModel' type: array title: Data + description: List of OpenAI model objects. type: object required: - data title: OpenAIListModelsResponse + description: Response containing a list of OpenAI model objects. Model: properties: identifier: @@ -8299,29 +8296,35 @@ components: model_id: type: string title: Model Id + description: The identifier of the model to register. provider_model_id: anyOf: - type: string - type: 'null' + description: The identifier of the model in the provider. provider_id: anyOf: - type: string - type: 'null' + description: The identifier of the provider. metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: Any additional metadata for this model. model_type: anyOf: - $ref: '#/components/schemas/ModelType' title: ModelType - type: 'null' + description: The type of model to register. title: ModelType type: object required: - model_id title: RegisterModelRequest + description: Request model for registering a model. ParamType: discriminator: mapping: @@ -9832,6 +9835,41 @@ components: - dataset_id title: UnregisterDatasetRequest type: object + ListModelsResponse: + description: Response containing a list of model objects. + properties: + data: + description: List of model objects. + items: + $ref: '#/components/schemas/Model' + title: Data + type: array + required: + - data + title: ListModelsResponse + type: object + GetModelRequest: + description: Request model for getting a model by ID. + properties: + model_id: + description: The ID of the model to get. + title: Model Id + type: string + required: + - model_id + title: GetModelRequest + type: object + UnregisterModelRequest: + description: Request model for unregistering a model. + properties: + model_id: + description: The ID of the model to unregister. + title: Model Id + type: string + required: + - model_id + title: UnregisterModelRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index 2e8020c2db..324f61e7d6 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -3240,10 +3240,12 @@ components: $ref: '#/components/schemas/OpenAIModel' type: array title: Data + description: List of OpenAI model objects. type: object required: - data title: OpenAIListModelsResponse + description: Response containing a list of OpenAI model objects. Model: properties: identifier: @@ -7719,6 +7721,40 @@ components: - hyperparam_search_config - logger_config title: SupervisedFineTuneRequest + RegisterModelRequest: + properties: + model_id: + type: string + title: Model Id + description: The identifier of the model to register. + provider_model_id: + anyOf: + - type: string + - type: 'null' + description: The identifier of the model in the provider. + provider_id: + anyOf: + - type: string + - type: 'null' + description: The identifier of the provider. + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: Any additional metadata for this model. + model_type: + anyOf: + - $ref: '#/components/schemas/ModelType' + title: ModelType + - type: 'null' + description: The type of model to register. + title: ModelType + type: object + required: + - model_id + title: RegisterModelRequest + description: Request model for registering a model. ParamType: discriminator: mapping: @@ -8903,6 +8939,41 @@ components: - dataset_id title: UnregisterDatasetRequest type: object + ListModelsResponse: + description: Response containing a list of model objects. + properties: + data: + description: List of model objects. + items: + $ref: '#/components/schemas/Model' + title: Data + type: array + required: + - data + title: ListModelsResponse + type: object + GetModelRequest: + description: Request model for getting a model by ID. + properties: + model_id: + description: The ID of the model to get. + title: Model Id + type: string + required: + - model_id + title: GetModelRequest + type: object + UnregisterModelRequest: + description: Request model for unregistering a model. + properties: + model_id: + description: The ID of the model to unregister. + title: Model Id + type: string + required: + - model_id + title: UnregisterModelRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 9ee3bd08dc..3fc68914fc 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -989,7 +989,7 @@ paths: get: responses: '200': - description: A OpenAIListModelsResponse. + description: A list of OpenAI model objects. content: application/json: schema: @@ -1008,37 +1008,34 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Models - summary: Openai List Models + summary: List models using the OpenAI API. description: List models using the OpenAI API. operationId: openai_list_models_v1_models_get /v1/models/{model_id}: get: responses: '200': - description: A Model. + description: The model object. content: application/json: schema: $ref: '#/components/schemas/Model' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Models - summary: Get Model - description: |- - Get model. - - Get a model by its identifier. + summary: Get a model by its identifier. + description: Get a model by its identifier. operationId: get_model_v1_models__model_id__get parameters: - name: model_id @@ -1046,7 +1043,9 @@ paths: required: true schema: type: string - description: 'Path parameter: model_id' + description: The ID of the model to get. + title: Model Id + description: The ID of the model to get. /v1/moderations: post: responses: @@ -5096,10 +5095,12 @@ components: $ref: '#/components/schemas/OpenAIModel' type: array title: Data + description: List of OpenAI model objects. type: object required: - data title: OpenAIListModelsResponse + description: Response containing a list of OpenAI model objects. Model: properties: identifier: @@ -9921,6 +9922,40 @@ components: - group_size title: QATFinetuningConfig description: Configuration for Quantization-Aware Training (QAT) fine-tuning. + RegisterModelRequest: + properties: + model_id: + type: string + title: Model Id + description: The identifier of the model to register. + provider_model_id: + anyOf: + - type: string + - type: 'null' + description: The identifier of the model in the provider. + provider_id: + anyOf: + - type: string + - type: 'null' + description: The identifier of the provider. + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: Any additional metadata for this model. + model_type: + anyOf: + - $ref: '#/components/schemas/ModelType' + title: ModelType + - type: 'null' + description: The type of model to register. + title: ModelType + type: object + required: + - model_id + title: RegisterModelRequest + description: Request model for registering a model. ParamType: discriminator: mapping: @@ -11324,6 +11359,41 @@ components: - dataset_id title: UnregisterDatasetRequest type: object + ListModelsResponse: + description: Response containing a list of model objects. + properties: + data: + description: List of model objects. + items: + $ref: '#/components/schemas/Model' + title: Data + type: array + required: + - data + title: ListModelsResponse + type: object + GetModelRequest: + description: Request model for getting a model by ID. + properties: + model_id: + description: The ID of the model to get. + title: Model Id + type: string + required: + - model_id + title: GetModelRequest + type: object + UnregisterModelRequest: + description: Request model for unregistering a model. + properties: + model_id: + description: The ID of the model to unregister. + title: Model Id + type: string + required: + - model_id + title: UnregisterModelRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 590b3b9034..4f334f29d2 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -991,7 +991,7 @@ paths: get: responses: '200': - description: A OpenAIListModelsResponse. + description: A list of OpenAI model objects. content: application/json: schema: @@ -1010,13 +1010,13 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Models - summary: Openai List Models + summary: List models using the OpenAI API. description: List models using the OpenAI API. operationId: openai_list_models_v1_models_get post: responses: '200': - description: A Model. + description: The registered model object. content: application/json: schema: @@ -1035,11 +1035,8 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Models - summary: Register Model - description: |- - Register model. - - Register a model. + summary: Register a model. + description: Register a model. operationId: register_model_v1_models_post requestBody: content: @@ -1052,30 +1049,27 @@ paths: get: responses: '200': - description: A Model. + description: The model object. content: application/json: schema: $ref: '#/components/schemas/Model' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Models - summary: Get Model - description: |- - Get model. - - Get a model by its identifier. + summary: Get a model by its identifier. + description: Get a model by its identifier. operationId: get_model_v1_models__model_id__get parameters: - name: model_id @@ -1083,30 +1077,29 @@ paths: required: true schema: type: string - description: 'Path parameter: model_id' + description: The ID of the model to get. + title: Model Id + description: The ID of the model to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The model was successfully unregistered. tags: - Models - summary: Unregister Model - description: |- - Unregister model. - - Unregister a model. + summary: Unregister a model. + description: Unregister a model. operationId: unregister_model_v1_models__model_id__delete parameters: - name: model_id @@ -1114,7 +1107,9 @@ paths: required: true schema: type: string - description: 'Path parameter: model_id' + description: The ID of the model to unregister. + title: Model Id + description: The ID of the model to unregister. deprecated: true /v1/moderations: post: @@ -6611,10 +6606,12 @@ components: $ref: '#/components/schemas/OpenAIModel' type: array title: Data + description: List of OpenAI model objects. type: object required: - data title: OpenAIListModelsResponse + description: Response containing a list of OpenAI model objects. Model: properties: identifier: @@ -11618,29 +11615,35 @@ components: model_id: type: string title: Model Id + description: The identifier of the model to register. provider_model_id: anyOf: - type: string - type: 'null' + description: The identifier of the model in the provider. provider_id: anyOf: - type: string - type: 'null' + description: The identifier of the provider. metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: Any additional metadata for this model. model_type: anyOf: - $ref: '#/components/schemas/ModelType' title: ModelType - type: 'null' + description: The type of model to register. title: ModelType type: object required: - model_id title: RegisterModelRequest + description: Request model for registering a model. ParamType: discriminator: mapping: @@ -13151,6 +13154,41 @@ components: - dataset_id title: UnregisterDatasetRequest type: object + ListModelsResponse: + description: Response containing a list of model objects. + properties: + data: + description: List of model objects. + items: + $ref: '#/components/schemas/Model' + title: Data + type: array + required: + - data + title: ListModelsResponse + type: object + GetModelRequest: + description: Request model for getting a model by ID. + properties: + model_id: + description: The ID of the model to get. + title: Model Id + type: string + required: + - model_id + title: GetModelRequest + type: object + UnregisterModelRequest: + description: Request model for unregistering a model. + properties: + model_id: + description: The ID of the model to unregister. + title: Model Id + type: string + required: + - model_id + title: UnregisterModelRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py index e87fc85b39..089b2bb337 100644 --- a/src/llama_stack/core/routers/inference.py +++ b/src/llama_stack/core/routers/inference.py @@ -46,6 +46,7 @@ OpenAITokenLogProb, OpenAITopLogProb, Order, + RegisterModelRequest, RerankResponse, RoutingTable, ) @@ -87,7 +88,14 @@ async def register_model( logger.debug( f"InferenceRouter.register_model: {model_id=} {provider_model_id=} {provider_id=} {metadata=} {model_type=}", ) - await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type) + request = RegisterModelRequest( + model_id=model_id, + provider_model_id=provider_model_id, + provider_id=provider_id, + metadata=metadata, + model_type=model_type, + ) + await self.routing_table.register_model(request) async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]: model = await self.routing_table.get_object_by_identifier("model", model_id) diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py index 77c11554d8..dbe792502e 100644 --- a/src/llama_stack/core/routing_tables/models.py +++ b/src/llama_stack/core/routing_tables/models.py @@ -16,6 +16,7 @@ from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger from llama_stack_api import ( + GetModelRequest, ListModelsResponse, Model, ModelNotFoundError, @@ -23,6 +24,8 @@ ModelType, OpenAIListModelsResponse, OpenAIModel, + RegisterModelRequest, + UnregisterModelRequest, ) from .common import CommonRoutingTableImpl, lookup_model @@ -171,7 +174,12 @@ async def openai_list_models(self) -> OpenAIListModelsResponse: ] return OpenAIListModelsResponse(data=openai_models) - async def get_model(self, model_id: str) -> Model: + async def get_model(self, request_or_model_id: GetModelRequest | str) -> Model: + # Support both the public Models API (GetModelRequest) and internal ModelStore interface (string) + if isinstance(request_or_model_id, GetModelRequest): + model_id = request_or_model_id.model_id + else: + model_id = request_or_model_id return await lookup_model(self, model_id) async def get_provider_impl(self, model_id: str) -> Any: @@ -195,12 +203,28 @@ async def has_model(self, model_id: str) -> bool: async def register_model( self, - model_id: str, + request: RegisterModelRequest | str | None = None, + *, + model_id: str | None = None, provider_model_id: str | None = None, provider_id: str | None = None, metadata: dict[str, Any] | None = None, model_type: ModelType | None = None, ) -> Model: + # Support both the public Models API (RegisterModelRequest) and legacy parameter-based interface + if isinstance(request, RegisterModelRequest): + model_id = request.model_id + provider_model_id = request.provider_model_id + provider_id = request.provider_id + metadata = request.metadata + model_type = request.model_type + elif isinstance(request, str): + # Legacy positional argument: register_model("model-id", ...) + model_id = request + + if model_id is None: + raise ValueError("Either request or model_id must be provided") + if provider_id is None: # If provider_id not specified, use the only provider if it supports this model if len(self.impls_by_provider_id) == 1: @@ -229,7 +253,22 @@ async def register_model( registered_model = await self.register_object(model) return registered_model - async def unregister_model(self, model_id: str) -> None: + async def unregister_model( + self, + request: UnregisterModelRequest | str | None = None, + *, + model_id: str | None = None, + ) -> None: + # Support both the public Models API (UnregisterModelRequest) and legacy parameter-based interface + if isinstance(request, UnregisterModelRequest): + model_id = request.model_id + elif isinstance(request, str): + # Legacy positional argument: unregister_model("model-id") + model_id = request + + if model_id is None: + raise ValueError("Either request or model_id must be provided") + existing_model = await self.get_model(model_id) if existing_model is None: raise ModelNotFoundError(model_id) diff --git a/src/llama_stack/core/server/fastapi_router_registry.py b/src/llama_stack/core/server/fastapi_router_registry.py index 02c4d00f6b..3f5cfec326 100644 --- a/src/llama_stack/core/server/fastapi_router_registry.py +++ b/src/llama_stack/core/server/fastapi_router_registry.py @@ -16,7 +16,7 @@ from fastapi import APIRouter from fastapi.routing import APIRoute -from llama_stack_api import admin, batches, benchmarks, datasets, files, inspect_api, providers +from llama_stack_api import admin, batches, benchmarks, datasets, files, inspect_api, models, providers # Router factories for APIs that have FastAPI routers # Add new APIs here as they are migrated to the router system @@ -27,6 +27,7 @@ "batches": batches.fastapi_routes.create_router, "benchmarks": benchmarks.fastapi_routes.create_router, "datasets": datasets.fastapi_routes.create_router, + "models": models.fastapi_routes.create_router, "providers": providers.fastapi_routes.create_router, "inspect": inspect_api.fastapi_routes.create_router, "files": files.fastapi_routes.create_router, diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py index 77f85264c2..dd67343bd1 100644 --- a/src/llama_stack/providers/inline/batches/reference/batches.py +++ b/src/llama_stack/providers/inline/batches/reference/batches.py @@ -23,6 +23,7 @@ BatchObject, ConflictError, Files, + GetModelRequest, Inference, ListBatchesResponse, Models, @@ -485,7 +486,7 @@ async def _validate_input(self, batch: BatchObject) -> tuple[list[BatchError], l if "model" in request_body and isinstance(request_body["model"], str): try: - await self.models_api.get_model(request_body["model"]) + await self.models_api.get_model(GetModelRequest(model_id=request_body["model"])) except Exception: errors.append( BatchError( diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py index b7fa1cc88c..b15e401e47 100644 --- a/src/llama_stack_api/__init__.py +++ b/src/llama_stack_api/__init__.py @@ -241,6 +241,7 @@ from .inspect_api import Inspect from .models import ( CommonModelFields, + GetModelRequest, ListModelsResponse, Model, ModelInput, @@ -248,6 +249,8 @@ ModelType, OpenAIListModelsResponse, OpenAIModel, + RegisterModelRequest, + UnregisterModelRequest, ) from .openai_responses import ( AllowedToolsFilter, @@ -648,6 +651,9 @@ "ModelType", "ModelTypeError", "Models", + "GetModelRequest", + "RegisterModelRequest", + "UnregisterModelRequest", "ModelsProtocolPrivate", "ModerationObject", "ModerationObjectResults", diff --git a/src/llama_stack_api/models/__init__.py b/src/llama_stack_api/models/__init__.py new file mode 100644 index 0000000000..ace2fd4eff --- /dev/null +++ b/src/llama_stack_api/models/__init__.py @@ -0,0 +1,47 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Models API protocol and models. + +This module contains the Models protocol definition. +Pydantic models are defined in llama_stack_api.models.models. +The FastAPI router is defined in llama_stack_api.models.fastapi_routes. +""" + +# Import fastapi_routes for router factory access +from . import fastapi_routes + +# Import new protocol for FastAPI router +from .api import Models + +# Import models for re-export +from .models import ( + CommonModelFields, + GetModelRequest, + ListModelsResponse, + Model, + ModelInput, + ModelType, + OpenAIListModelsResponse, + OpenAIModel, + RegisterModelRequest, + UnregisterModelRequest, +) + +__all__ = [ + "CommonModelFields", + "fastapi_routes", + "GetModelRequest", + "ListModelsResponse", + "Model", + "ModelInput", + "Models", + "ModelType", + "OpenAIListModelsResponse", + "OpenAIModel", + "RegisterModelRequest", + "UnregisterModelRequest", +] diff --git a/src/llama_stack_api/models/api.py b/src/llama_stack_api/models/api.py new file mode 100644 index 0000000000..22dc81e6ee --- /dev/null +++ b/src/llama_stack_api/models/api.py @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Models API protocol definition. + +This module contains the Models protocol definition. +Pydantic models are defined in llama_stack_api.models.models. +The FastAPI router is defined in llama_stack_api.models.fastapi_routes. +""" + +from typing import Protocol, runtime_checkable + +from .models import ( + GetModelRequest, + ListModelsResponse, + Model, + OpenAIListModelsResponse, + RegisterModelRequest, + UnregisterModelRequest, +) + + +@runtime_checkable +class Models(Protocol): + """Protocol for model management operations.""" + + async def list_models(self) -> ListModelsResponse: ... + + async def openai_list_models(self) -> OpenAIListModelsResponse: ... + + async def get_model(self, request: GetModelRequest) -> Model: ... + + async def register_model(self, request: RegisterModelRequest) -> Model: ... + + async def unregister_model(self, request: UnregisterModelRequest) -> None: ... diff --git a/src/llama_stack_api/models/fastapi_routes.py b/src/llama_stack_api/models/fastapi_routes.py new file mode 100644 index 0000000000..d70a884b78 --- /dev/null +++ b/src/llama_stack_api/models/fastapi_routes.py @@ -0,0 +1,104 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""FastAPI router for the Models API. + +This module defines the FastAPI router for the Models API using standard +FastAPI route decorators. +""" + +from typing import Annotated + +from fastapi import APIRouter, Body, Depends + +from llama_stack_api.router_utils import create_path_dependency, standard_responses +from llama_stack_api.version import LLAMA_STACK_API_V1 + +from .api import Models +from .models import ( + GetModelRequest, + Model, + OpenAIListModelsResponse, + RegisterModelRequest, + UnregisterModelRequest, +) + +# Path parameter dependencies for single-field models +get_model_request = create_path_dependency(GetModelRequest) +unregister_model_request = create_path_dependency(UnregisterModelRequest) + + +def create_router(impl: Models) -> APIRouter: + """Create a FastAPI router for the Models API. + + Args: + impl: The Models implementation instance + + Returns: + APIRouter configured for the Models API + """ + router = APIRouter( + prefix=f"/{LLAMA_STACK_API_V1}", + tags=["Models"], + responses=standard_responses, + ) + + @router.get( + "/models", + response_model=OpenAIListModelsResponse, + summary="List models using the OpenAI API.", + description="List models using the OpenAI API.", + responses={ + 200: {"description": "A list of OpenAI model objects."}, + }, + ) + async def openai_list_models() -> OpenAIListModelsResponse: + return await impl.openai_list_models() + + @router.get( + "/models/{model_id:path}", + response_model=Model, + summary="Get a model by its identifier.", + description="Get a model by its identifier.", + responses={ + 200: {"description": "The model object."}, + }, + ) + async def get_model( + request: Annotated[GetModelRequest, Depends(get_model_request)], + ) -> Model: + return await impl.get_model(request) + + @router.post( + "/models", + response_model=Model, + summary="Register a model.", + description="Register a model.", + responses={ + 200: {"description": "The registered model object."}, + }, + deprecated=True, + ) + async def register_model( + request: Annotated[RegisterModelRequest, Body(...)], + ) -> Model: + return await impl.register_model(request) + + @router.delete( + "/models/{model_id:path}", + summary="Unregister a model.", + description="Unregister a model.", + responses={ + 200: {"description": "The model was successfully unregistered."}, + }, + deprecated=True, + ) + async def unregister_model( + request: Annotated[UnregisterModelRequest, Depends(unregister_model_request)], + ) -> None: + return await impl.unregister_model(request) + + return router diff --git a/src/llama_stack_api/models.py b/src/llama_stack_api/models/models.py similarity index 58% rename from src/llama_stack_api/models.py rename to src/llama_stack_api/models/models.py index 3efdfe66bd..b09eddf883 100644 --- a/src/llama_stack_api/models.py +++ b/src/llama_stack_api/models/models.py @@ -4,26 +4,25 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +"""Pydantic models for Models API requests and responses. + +This module defines the request and response models for the Models API +using Pydantic with Field descriptions for OpenAPI schema generation. +""" + from enum import StrEnum -from typing import Any, Literal, Protocol, runtime_checkable +from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field, field_validator from llama_stack_api.resource import Resource, ResourceType -from llama_stack_api.schema_utils import json_schema_type, webmethod -from llama_stack_api.version import LLAMA_STACK_API_V1 - - -class CommonModelFields(BaseModel): - metadata: dict[str, Any] = Field( - default_factory=dict, - description="Any additional metadata for this model", - ) +from llama_stack_api.schema_utils import json_schema_type @json_schema_type class ModelType(StrEnum): """Enumeration of supported model types in Llama Stack. + :cvar llm: Large language model for text generation and completion :cvar embedding: Embedding model for converting text to vector representations :cvar rerank: Reranking model for reordering documents based on their relevance to a query @@ -34,6 +33,13 @@ class ModelType(StrEnum): rerank = "rerank" +class CommonModelFields(BaseModel): + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Any additional metadata for this model", + ) + + @json_schema_type class Model(CommonModelFields, Resource): """A model resource representing an AI model registered in Llama Stack. @@ -77,8 +83,11 @@ class ModelInput(CommonModelFields): model_config = ConfigDict(protected_namespaces=()) +@json_schema_type class ListModelsResponse(BaseModel): - data: list[Model] + """Response containing a list of model objects.""" + + data: list[Model] = Field(..., description="List of model objects.") @json_schema_type @@ -101,71 +110,48 @@ class OpenAIModel(BaseModel): @json_schema_type class OpenAIListModelsResponse(BaseModel): - data: list[OpenAIModel] - - -@runtime_checkable -class Models(Protocol): - async def list_models(self) -> ListModelsResponse: - """List all models. - - :returns: A ListModelsResponse. - """ - ... - - @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1) - async def openai_list_models(self) -> OpenAIListModelsResponse: - """List models using the OpenAI API. - - :returns: A OpenAIListModelsResponse. - """ - ... - - @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1) - async def get_model( - self, - model_id: str, - ) -> Model: - """Get model. - - Get a model by its identifier. - - :param model_id: The identifier of the model to get. - :returns: A Model. - """ - ... - - @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - async def register_model( - self, - model_id: str, - provider_model_id: str | None = None, - provider_id: str | None = None, - metadata: dict[str, Any] | None = None, - model_type: ModelType | None = None, - ) -> Model: - """Register model. - - Register a model. - - :param model_id: The identifier of the model to register. - :param provider_model_id: The identifier of the model in the provider. - :param provider_id: The identifier of the provider. - :param metadata: Any additional metadata for this model. - :param model_type: The type of model to register. - :returns: A Model. - """ - ... - - @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True) - async def unregister_model( - self, - model_id: str, - ) -> None: - """Unregister model. - - Unregister a model. - - :param model_id: The identifier of the model to unregister. - """ - ... + """Response containing a list of OpenAI model objects.""" + + data: list[OpenAIModel] = Field(..., description="List of OpenAI model objects.") + + +# Request models for each endpoint + + +@json_schema_type +class GetModelRequest(BaseModel): + """Request model for getting a model by ID.""" + + model_id: str = Field(..., description="The ID of the model to get.") + + +@json_schema_type +class RegisterModelRequest(BaseModel): + """Request model for registering a model.""" + + model_id: str = Field(..., description="The identifier of the model to register.") + provider_model_id: str | None = Field(default=None, description="The identifier of the model in the provider.") + provider_id: str | None = Field(default=None, description="The identifier of the provider.") + metadata: dict[str, Any] | None = Field(default=None, description="Any additional metadata for this model.") + model_type: ModelType | None = Field(default=None, description="The type of model to register.") + + +@json_schema_type +class UnregisterModelRequest(BaseModel): + """Request model for unregistering a model.""" + + model_id: str = Field(..., description="The ID of the model to unregister.") + + +__all__ = [ + "CommonModelFields", + "GetModelRequest", + "ListModelsResponse", + "Model", + "ModelInput", + "ModelType", + "OpenAIListModelsResponse", + "OpenAIModel", + "RegisterModelRequest", + "UnregisterModelRequest", +]