diff --git a/cerebrium/container-images/custom-dockerfiles.mdx b/cerebrium/container-images/custom-dockerfiles.mdx
index 836ac455..fb765e08 100644
--- a/cerebrium/container-images/custom-dockerfiles.mdx
+++ b/cerebrium/container-images/custom-dockerfiles.mdx
@@ -51,40 +51,71 @@ CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8192
 When creating a Dockerfile for Cerebrium, there are three key requirements:
 
 1. You must expose a port using the `EXPOSE` command - this port will be referenced later in your `cerebrium.toml` configuration
-2. A `CMD` command is required to specify what runs when the container starts (typically your server process)
+2. Either a `CMD` or `ENTRYPOINT` directive must be defined in your Dockerfile OR the `entrypoint` key in your `cerebrium.toml` under `[runtime.docker]`. This specifies what runs when the container starts. The TOML configuration will take precedence
 3. Set the working directory using `WORKDIR` to ensure your application runs from the correct location (defaults to root directory if not specified)
 
-Update cerebrium.toml to include a custom runtime section with the `dockerfile_path` parameter:
+Update cerebrium.toml to include a docker runtime section with the `dockerfile_path` parameter:
 
 ```toml
-[cerebrium.runtime.custom]
+[deployment]
+name = "my-docker-app"
+
+[runtime.docker]
+dockerfile_path = "./Dockerfile"
 port = 8192
 healthcheck_endpoint = "/health"
 readycheck_endpoint = "/ready"
-dockerfile_path = "./Dockerfile"
 ```
 
-The configuration requires three key parameters:
+The configuration requires the following parameters:
 
+- `dockerfile_path`: The relative path to the Dockerfile used to build the app.
 - `port`: The port the server listens on.
 - `healthcheck_endpoint`: The endpoint used to confirm instance health. If unspecified, defaults to a TCP ping on the configured port. If the health check registers a non-200 response, it will be considered _unhealthy_, and be restarted should it not recover timely.
 - `readycheck_endpoint`: The endpoint used to confirm if the instance is ready to receive. If unspecified, defaults to a TCP ping on the configured port. If the ready check registers a non-200 response, it will not be a viable target for request routing.
-- `dockerfile_path`: The relative path to the Dockerfile used to build the app.
+- `entrypoint` (optional): The command to start the application. Will be required if `CMD` or `ENTRYPOINT` is not defined in the given dockerfile.
+
+### Entrypoint Precedence
 
-If a Dockerfile does not contain a `CMD` clause, specifying the `entrypoint` parameter in the `cerebrium.toml` file is required.
+<Info>
+  The `entrypoint` parameter in `cerebrium.toml` **always takes precedence**
+  over the `CMD` or `ENTRYPOINT` instruction in your Dockerfile. If you specify
+  an `entrypoint` in your TOML configuration, it will be used regardless of what
+  `CMD` or `ENTRYPOINT` is defined in your Dockerfile.
+</Info>
+
+If your Dockerfile does not contain a `CMD` or `ENTRYPOINT` instruction, you **must** specify the `entrypoint` parameter in your `cerebrium.toml`:
 
 ```toml
-[cerebrium.runtime.custom]
+[deployment]
+name = "my-docker-app"
+
+[runtime.docker]
+dockerfile_path = "./Dockerfile"
 entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8192"]
-...
+port = 8192
+healthcheck_endpoint = "/health"
+readycheck_endpoint = "/ready"
+```
+
+If you want to override your Dockerfile's `CMD` at deploy time without modifying the Dockerfile, simply add the `entrypoint` parameter to your TOML configuration:
+
+```toml
+[deployment]
+name = "my-docker-app"
+
+[runtime.docker]
+dockerfile_path = "./Dockerfile"
+# This will override any CMD in your Dockerfile
+entrypoint = ["python", "server.py", "--port", "8192"]
+port = 8192
 ```
 
 <Warning>
   When specifying a `dockerfile_path`, all dependencies and necessary commands
   should be installed and executed within the Dockerfile. Dependencies listed
-  under `cerebrium.dependencies.*`, as well as
-  `cerebrium.deployment.shell_commands` and
-  `cerebrium.deployment.pre_build_commands`, will be ignored.
+  under `dependencies.*`, as well as `shell_commands` and `pre_build_commands`,
+  will be ignored.
 </Warning>
 
 ## Building Generic Dockerized Apps
@@ -165,9 +196,12 @@ CMD ["dumb-init", "--", "/rs_server"]
 Similarly to the FastAPI webserver, the application should be configured in the `cerebrium.toml` file:
 
 ```toml
-[cerebrium.runtime.custom]
+[deployment]
+name = "rust-server"
+
+[runtime.docker]
+dockerfile_path = "./Dockerfile"
 port = 8192
 healthcheck_endpoint = "/health"
 readycheck_endpoint = "/ready"
-dockerfile_path = "./Dockerfile"
 ```
diff --git a/cerebrium/container-images/custom-web-servers.mdx b/cerebrium/container-images/custom-web-servers.mdx
index 50d0da68..1cb015f6 100644
--- a/cerebrium/container-images/custom-web-servers.mdx
+++ b/cerebrium/container-images/custom-web-servers.mdx
@@ -3,7 +3,7 @@ title: "Custom Python Web Servers"
 description: "Run ASGI/WSGI Python apps on Cerebrium"
 ---
 
-While Cerebrium's default runtime works well for most app needs, teams sometimes need more control over their web server implementation. Using ASGI or WSGI servers through Cerebrium's custom runtime feature enables capabilities like custom authentication, dynamic batching, frontend dashboards, public endpoints, and WebSocket connections.
+While Cerebrium's default runtime works well for most app needs, teams sometimes need more control over their web server implementation. Using ASGI or WSGI servers through Cerebrium's Python runtime feature enables capabilities like custom authentication, dynamic batching, frontend dashboards, public endpoints, and WebSocket connections.
 
 ## Setting Up Custom Servers
 
@@ -26,29 +26,46 @@ def ready():
     return "OK"
 ```
 
-Configure this server in `cerebrium.toml` by adding a custom runtime section:
+Configure this server in `cerebrium.toml` by adding a Python runtime section:
 
 ```toml
-[cerebrium.runtime.custom]
-port = 5000
+[deployment]
+name = "my-fastapi-app"
+
+[runtime.python]
 entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "5000"]
+port = 5000
 healthcheck_endpoint = "/health"
 readycheck_endpoint = "/ready"
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 pydantic = "latest"
 numpy = "latest"
 loguru = "latest"
 fastapi = "latest"
+uvicorn = "latest"
 ```
 
-The configuration requires three key parameters:
+The configuration requires the following key parameters:
 
 - `entrypoint`: The command that starts your server
 - `port`: The port your server listens on
 - `healthcheck_endpoint`: The endpoint used to confirm instance health. If unspecified, defaults to a TCP ping on the configured port. If the health check registers a non-200 response, it will be considered _unhealthy_, and be restarted should it not recover timely.
 - `readycheck_endpoint`: The endpoint used to confirm if the instance is ready to receive. If unspecified, defaults to a TCP ping on the configured port. If the ready check registers a non-200 response, it will not be a viable target for request routing.
 
+You can also configure build settings in the Python runtime section:
+
+```toml
+[runtime.python]
+python_version = "3.11"
+docker_base_image_url = "debian:bookworm-slim"
+use_uv = true
+entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+port = 8000
+healthcheck_endpoint = "/health"
+readycheck_endpoint = "/ready"
+```
+
 <Info>
   For ASGI applications like FastAPI, include the appropriate server package
   (like `uvicorn`) in your dependencies. After deployment, your endpoints become
diff --git a/cerebrium/container-images/defining-container-images.mdx b/cerebrium/container-images/defining-container-images.mdx
index c4cad161..8d3becbf 100644
--- a/cerebrium/container-images/defining-container-images.mdx
+++ b/cerebrium/container-images/defining-container-images.mdx
@@ -21,9 +21,9 @@ Check out the [Introductory Guide](/cerebrium/getting-started/introduction) for
 <Info>
   It is possible to initialize an existing project by adding a `cerebrium.toml`
   file to the root of your codebase, defining your entrypoint (`main.py` if
-  using the default runtime, or adding an entrypoint to the .toml file if using
-  a custom runtime) and including the necessary files in the `deployment`
-  section of your `cerebrium.toml` file.
+  using the default cortex runtime, or adding an entrypoint to the runtime
+  section if using a python or docker runtime) and including the necessary files
+  in the `deployment` section of your `cerebrium.toml` file.
 </Info>
 
 ## Hardware Configuration
@@ -31,7 +31,7 @@ Check out the [Introductory Guide](/cerebrium/getting-started/introduction) for
 Cerebrium provides flexible hardware options to match app requirements. The basic configuration specifies GPU type and memory allocations.
 
 ```toml
-[cerebrium.hardware]
+[hardware]
 compute = "AMPERE_A10" # GPU selection
 memory = 16.0          # Memory allocation in GB
 cpu = 4                # Number of CPU cores
@@ -44,11 +44,20 @@ For detailed hardware specifications and performance characteristics see the [GP
 
 ### Selecting a Python Version
 
-The Python runtime version forms the foundation of every Cerebrium app. We currently support versions 3.10 to 3.13. Specify the Python version in the deployment section of the configuration:
+The Python runtime version forms the foundation of every Cerebrium app. We currently support versions 3.10 to 3.13. Specify the Python version in the runtime section of the configuration:
 
 ```toml
-[cerebrium.deployment]
-python_version = 3.11
+[runtime.cortex]
+python_version = "3.11"
+```
+
+Or for custom Python ASGI/WSGI apps:
+
+```toml
+[runtime.python]
+python_version = "3.11"
+entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+port = 8000
 ```
 
 The Python version affects the entire dependency chain. For instance, some packages may not support newer Python versions immediately after release.
@@ -63,7 +72,7 @@ The Python version affects the entire dependency chain. For instance, some packa
 Python dependencies can be managed directly in TOML or through requirement files. The system caches packages to speed up builds:
 
 ```toml
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 torch = "==2.0.0"
 transformers = "==4.30.0"
 numpy = "latest"
@@ -72,8 +81,8 @@ numpy = "latest"
 Or using an existing requirements file:
 
 ```toml
-[cerebrium.dependencies.paths]
-pip = "requirements.txt"
+[dependencies.pip]
+_file_relative_path = "requirements.txt"
 ```
 
 <Tip>
@@ -85,20 +94,20 @@ The system implements an intelligent caching strategy at the node level. When an
 
 ### Adding APT Packages
 
-System-level packages provide the foundation for many ML apps, handling everything from image-processing libraries to audio codecs. These can be added to the `cerebrium.toml` file under the `[cerebrium.dependencies.apt]` section as follows:
+System-level packages provide the foundation for many ML apps, handling everything from image-processing libraries to audio codecs. These can be added to the `cerebrium.toml` file under the `[dependencies.apt]` section as follows:
 
 ```toml
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 ffmpeg = "latest"
 libopenblas-base = "latest"
 libomp-dev = "latest"
 ```
 
-For teams with standardized system dependencies, text files can be used instead by adding the following to the `[cerebrium.dependencies.paths]` section:
+For teams with standardized system dependencies, text files can be used instead:
 
 ```toml
-[cerebrium.dependencies.paths]
-apt = "deps_folder/pkglist.txt"
+[dependencies.apt]
+_file_relative_path = "deps_folder/pkglist.txt"
 ```
 
 Since APT packages modify the system environment, any changes to these dependencies trigger a full rebuild of the container image. This ensures system-level changes are properly integrated but means builds will take longer than when modifying Python packages alone.
@@ -108,7 +117,7 @@ Since APT packages modify the system environment, any changes to these dependenc
 Conda excels at managing complex system-level Python dependencies, particularly for GPU support and scientific computing:
 
 ```toml
-[cerebrium.dependencies.conda]
+[dependencies.conda]
 cuda = ">=11.7"
 cudatoolkit = "11.7"
 opencv = "latest"
@@ -117,8 +126,8 @@ opencv = "latest"
 Teams using conda environments can specify their environment file:
 
 ```toml
-[cerebrium.dependencies.paths]
-conda = "conda_pkglist.txt"
+[dependencies.conda]
+_file_relative_path = "conda_pkglist.txt"
 ```
 
 Like APT packages, Conda packages often modify system-level components. Changes to Conda dependencies will trigger a full rebuild to ensure all binary dependencies and system libraries are correctly configured. Consider batching Conda dependency updates together to minimize rebuild time.
@@ -132,8 +141,7 @@ Cerebrium's build process includes two specialized command types that execute at
 Pre-build commands execute at the start of the build process, before dependency installation begins. This early execution timing makes them essential for setting up the build environment:
 
 ```toml
-
-[cerebrium.deployment]
+[runtime.cortex]
 pre_build_commands = [
     # Add specialized build tools
     "curl -o /usr/local/bin/pget -L 'https://github.com/replicate/pget/releases/download/v0.6.2/pget_linux_x86_64'",
@@ -148,7 +156,7 @@ Pre-build commands typically handle tasks like installing build tools, configuri
 Shell commands execute after all dependencies install and the application code copies into the container. This later timing ensures access to the complete environment:
 
 ```toml
-[cerebrium.deployment]
+[runtime.cortex]
 shell_commands = [
     # Initialize application resources
     "python -m download_models",
@@ -178,7 +186,7 @@ The base image selection shapes how an app runs in Cerebrium. While the default
 Cerebrium supports several categories of base images to ensure system compatibility such as nvidia, ubuntu and python images.
 
 ```toml
-[cerebrium.deployment]
+[runtime.cortex]
 docker_base_image_url = "debian:bookworm-slim" # Default minimal image
 #docker_base_image_url = "nvidia/cuda:12.0.1-runtime-ubuntu22.04" # CUDA-enabled images
 #docker_base_image_url = "ubuntu:22.04"  # debian images
@@ -205,7 +213,7 @@ docker login -u your-dockerhub-username
 After logging in, you can use the image in your configuration:
 
 ```toml
-[cerebrium.deployment]
+[runtime.cortex]
 docker_base_image_url = "bob/infinity:latest"
 ```
 
@@ -226,7 +234,7 @@ docker_base_image_url = "bob/infinity:latest"
 Public ECR images from the `public.ecr.aws` registry work without authentication:
 
 ```toml
-[cerebrium.deployment]
+[runtime.cortex]
 docker_base_image_url = "public.ecr.aws/lambda/python:3.11"
 ```
 
@@ -234,19 +242,22 @@ However, **private ECR images** require authentication. See [Using Private Docke
 
 ## Custom Runtimes
 
-While Cerebrium's default runtime works well for most apps, teams often need more control over their server implementation. Custom runtimes enable features like custom authentication, dynamic batching, public endpoints, or WebSocket connections.
+While Cerebrium's default cortex runtime works well for most apps, teams often need more control over their server implementation. Custom runtimes enable features like custom authentication, dynamic batching, public endpoints, or WebSocket connections.
 
-### Basic Configuration
+### Python Runtime (ASGI/WSGI)
 
-Define a custom runtime by adding the `cerebrium.runtime.custom` section to the configuration:
+For custom Python web servers, use the `[runtime.python]` section:
 
 ```toml
-[cerebrium.runtime.custom]
+[deployment]
+name = "my-fastapi-app"
+
+[runtime.python]
+python_version = "3.11"
 entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
 port = 8080
 healthcheck_endpoint = ""  # Empty string uses TCP health check
 readycheck_endpoint = ""  # Empty string uses TCP health check
-
 ```
 
 Key parameters:
@@ -259,21 +270,45 @@ Key parameters:
 <Info>
   Check out [this
   example](https://github.com/CerebriumAI/examples/tree/master/11-python-apps/1-asgi-fastapi-server)
-  for a detailed implementation of a FastAPI server that uses a custom runtime.
+  for a detailed implementation of a FastAPI server that uses a Python runtime.
 </Info>
 
+### Docker Runtime
+
+For complete control over your container, use the `[runtime.docker]` section with a custom Dockerfile:
+
+```toml
+[deployment]
+name = "my-docker-app"
+
+[runtime.docker]
+dockerfile_path = "./Dockerfile"
+port = 8080
+healthcheck_endpoint = "/health"
+readycheck_endpoint = "/ready"
+```
+
+<Warning>
+  When using the docker runtime, all dependencies and build commands should be
+  handled within the Dockerfile. The `[dependencies.*]` sections will be
+  ignored.
+</Warning>
+
 ### Self-Contained Servers
 
 Custom runtimes also support apps with built-in servers. For example, deploying a VLLM server requires no Python code:
 
 ```toml
-[cerebrium.runtime.custom]
-entrypoint = "vllm serve meta-llama/Meta-Llama-3-8B-Instruct --host 0.0.0.0 --port 8000 --device cuda"
+[deployment]
+name = "vllm-server"
+
+[runtime.python]
+entrypoint = ["vllm", "serve", "meta-llama/Meta-Llama-3-8B-Instruct", "--host", "0.0.0.0", "--port", "8000", "--device", "cuda"]
 port = 8000
 healthcheck_endpoint = "/health"
-healthcheck_endpoint = "/ready"
+readycheck_endpoint = "/ready"
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 torch = "latest"
 vllm = "latest"
 ```
diff --git a/cerebrium/container-images/private-docker-registry.mdx b/cerebrium/container-images/private-docker-registry.mdx
index 3af2526e..756298ed 100644
--- a/cerebrium/container-images/private-docker-registry.mdx
+++ b/cerebrium/container-images/private-docker-registry.mdx
@@ -61,8 +61,10 @@ You should see your registry URL(s) listed.
 Edit your cerebrium.toml and enter the url of the registry image from the step 2:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "my-app"
+
+[runtime.cortex]
 python_version = "3.11"
 docker_base_image_url = "your-registry.com/your-org/your-image:tag"
 
diff --git a/cerebrium/deployments/gradual-roll-out.mdx b/cerebrium/deployments/gradual-roll-out.mdx
index 56310cfa..e636f33d 100644
--- a/cerebrium/deployments/gradual-roll-out.mdx
+++ b/cerebrium/deployments/gradual-roll-out.mdx
@@ -5,7 +5,7 @@ description: "Control the transition between revisions during deployments"
 
 <Note>This feature is available from CLI version 1.38.2</Note>
 
-The `roll_out_duration_seconds` parameter in the `[cerebrium.scaling]` section of your `cerebrium.toml` file controls how quickly traffic transitions between revisions after a successful build.
+The `roll_out_duration_seconds` parameter in the `[scaling]` section of your `cerebrium.toml` file controls how quickly traffic transitions between revisions after a successful build.
 
 ## Overview
 
@@ -15,10 +15,10 @@ Traffic is shifted in 5 batches of 20% each, over the specified duration. This g
 
 ## Configuration
 
-Add the `roll_out_duration_seconds` parameter to the `[cerebrium.scaling]` section of your `cerebrium.toml` file:
+Add the `roll_out_duration_seconds` parameter to the `[scaling]` section of your `cerebrium.toml` file:
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 roll_out_duration_seconds = 0  # Default value
 ```
 
diff --git a/cerebrium/deployments/multi-region-deployment.mdx b/cerebrium/deployments/multi-region-deployment.mdx
index a7a8ae03..ce7e3d5e 100644
--- a/cerebrium/deployments/multi-region-deployment.mdx
+++ b/cerebrium/deployments/multi-region-deployment.mdx
@@ -70,10 +70,10 @@ cerebrium ls --region eu-west-2
 
 ## App Deployment
 
-Configure your app's deployment region using the `region` parameter in the `[cerebrium.hardware]` section of your `cerebrium.toml` file:
+Configure your app's deployment region using the `region` parameter in the `[hardware]` section of your `cerebrium.toml` file:
 
 ```toml
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "AMPERE_A10"
diff --git a/cerebrium/endpoints/webhook.mdx b/cerebrium/endpoints/webhook.mdx
index aa092e03..55c984f7 100644
--- a/cerebrium/endpoints/webhook.mdx
+++ b/cerebrium/endpoints/webhook.mdx
@@ -17,7 +17,7 @@ curl -X POST https://api.aws.us-east-1.cerebrium.ai/v4/<YOUR-PROJECT-ID>/<YOUR-A
 This will then forward the body by sending a POST request to your specified webhook. Since this is
 a feature of our proxy, you will not need to modify any part of your code to use the webhook
 functionality. You should, however, ensure that your endpoint is able to receive POST requests. While
-this will work for both Cortex and Custom runtimes, it will only work for HTTP requests, and not websockets.
+this will work for both cortex and Custom runtimes, it will only work for HTTP requests, and not websockets.
 
 ## Retry Behavior
 
diff --git a/cerebrium/endpoints/websockets.mdx b/cerebrium/endpoints/websockets.mdx
index 87b6bd0f..0d871416 100644
--- a/cerebrium/endpoints/websockets.mdx
+++ b/cerebrium/endpoints/websockets.mdx
@@ -10,9 +10,9 @@ To set up a WebSocket endpoint, you need to configure your app to use a custom r
 Below is an example of the required changes in your `cerebrium.toml` configuration file:
 
 ```toml
-[cerebrium.runtime.custom]
+[runtime.python]
 port = 5000
-entrypoint = "uvicorn main:app --host 0.0.0.0 --port 5000"
+entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "5000"]
 healthcheck_endpoint = "/health"
 readycheck_endpoint = "/ready"
 ```
diff --git a/cerebrium/hardware/cpu-and-memory.mdx b/cerebrium/hardware/cpu-and-memory.mdx
index c2db8b15..3332d184 100644
--- a/cerebrium/hardware/cpu-and-memory.mdx
+++ b/cerebrium/hardware/cpu-and-memory.mdx
@@ -13,7 +13,7 @@ CPU and memory resources on Cerebrium are allocated per container and billed bas
 CPU resources are specified as vCPU units (float) in the `cerebrium.toml` file:
 
 ```toml
-[cerebrium.hardware]
+[hardware]
 cpu = 4    # Number of vCPU cores
 ```
 
@@ -24,7 +24,7 @@ For most applications, starting with 4 CPU cores is recommended. Additional core
 Memory is specified in gigabytes as a floating-point number:
 
 ```toml
-[cerebrium.hardware]
+[hardware]
 memory = 16.0    # Memory in GB
 ```
 
diff --git a/cerebrium/hardware/using-cuda.mdx b/cerebrium/hardware/using-cuda.mdx
index 2f2490a8..3f7359d9 100644
--- a/cerebrium/hardware/using-cuda.mdx
+++ b/cerebrium/hardware/using-cuda.mdx
@@ -15,7 +15,7 @@ CUDA connects apps to graphics cards, splitting large tasks into smaller pieces
 Many Python packages come with built-in CUDA support. For example, the popular machine learning package PyTorch includes CUDA in its installation:
 
 ```toml
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 torch = "latest" # PyTorch with graphics card support
 ```
 
@@ -31,7 +31,7 @@ Some apps need direct access to CUDA system libraries and tools. The CUDA base i
 The base image can be set in the `cerebrium.toml` file:
 
 ```toml
-[cerebrium.deployment]
+[runtime.cortex]
 docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
 ```
 
diff --git a/cerebrium/hardware/using-gpus.mdx b/cerebrium/hardware/using-gpus.mdx
index 9627a04c..8f11c2e8 100644
--- a/cerebrium/hardware/using-gpus.mdx
+++ b/cerebrium/hardware/using-gpus.mdx
@@ -8,7 +8,7 @@ Applications deployed on Cerebrium can access GPU computing power without managi
 
 ## Specifying GPUs
 
-The GPU configuration in the `cerebrium.toml` is handled through the `[cerebrium.hardware]` section, where you can specify both the type (using the `compute` parameter) and quantity of GPUs (`gpu_count`) for your app. We address additional deployment configurations and GPU scaling considerations in more detail in the sections below.
+The GPU configuration in the `cerebrium.toml` is handled through the `[hardware]` section, where you can specify both the type (using the `compute` parameter) and quantity of GPUs (`gpu_count`) for your app. We address additional deployment configurations and GPU scaling considerations in more detail in the sections below.
 
 ## Available GPUs
 
@@ -54,7 +54,7 @@ Multiple GPUs become essential when:
 Multiple GPUs are configured in the `cerebrium.toml` file:
 
 ```toml
-[cerebrium.hardware]
+[hardware]
 compute = "AMPERE_A100_80GB"
 gpu_count = 4        # Number of GPUs needed
 cpu = 8
diff --git a/cerebrium/other-topics/faster-cold-starts.mdx b/cerebrium/other-topics/faster-cold-starts.mdx
index 9f158844..02a21f3b 100644
--- a/cerebrium/other-topics/faster-cold-starts.mdx
+++ b/cerebrium/other-topics/faster-cold-starts.mdx
@@ -46,7 +46,7 @@ In this section below, we'll show you how to use **Tensorizer** to load your mod
 
 ### Installation
 
-Add the following to your `[cerebrium.dependencies.pip]` in your `cerebrium.toml` file to install Tensorizer in your deployment:
+Add the following to your `[dependencies.pip]` in your `cerebrium.toml` file to install Tensorizer in your deployment:
 
 ```txt
 tensorizer = ">=2.7.0"
diff --git a/cerebrium/other-topics/request-response-logging.mdx b/cerebrium/other-topics/request-response-logging.mdx
index a0eb337c..0b3cf6a4 100644
--- a/cerebrium/other-topics/request-response-logging.mdx
+++ b/cerebrium/other-topics/request-response-logging.mdx
@@ -3,17 +3,17 @@ title: "Request and Response Logging"
 description: "Control request and response logs in your Cerebrium apps"
 ---
 
-By default, the Cortex runtime logs all requests and responses to your Cerebrium app. These logs can be viewed in your app's dashboard and are useful for debugging and monitoring. However, in some cases, you may want to disable these logs for privacy, security, or performance reasons.
+By default, the cortex runtime logs all requests and responses to your Cerebrium app. These logs can be viewed in your app's dashboard and are useful for debugging and monitoring. However, in some cases, you may want to disable these logs for privacy, security, or performance reasons.
 
 ## Controlling Logs
 
-Cerebrium provides two settings that allow you to control logging behavior in the Cortex runtime:
+Cerebrium provides two settings that allow you to control logging behavior in the cortex runtime:
 
 - `DISABLE_REQUEST_LOGS` - When enabled, prevents logging of incoming request data
 - `DISABLE_RESPONSE_LOGS` - When enabled, prevents logging of response data from your app
 
 <Note>
-  These settings only affect the default Cortex runtime. If you are using a
+  These settings only affect the default cortex runtime. If you are using a
   [custom runtime](/cerebrium/container-images/custom-web-servers), you will
   need to handle logging behavior in your own server implementation.
 </Note>
diff --git a/cerebrium/partner-services/deepgram.mdx b/cerebrium/partner-services/deepgram.mdx
index d97ebcee..3bd25c05 100644
--- a/cerebrium/partner-services/deepgram.mdx
+++ b/cerebrium/partner-services/deepgram.mdx
@@ -252,21 +252,21 @@ max_response_size = 1073741824 # 1GB
 6. Update the cerebrium.toml file with the following configuration to set hardware requirements, scaling parameters, region, and other settings:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "deepgram"
 # Enable below in production environments
 disable_auth = true
 
-[cerebrium.runtime.deepgram]
+[runtime.deepgram]
 
-[cerebrium.hardware]
+[hardware]
 cpu = 4
 region = "us-east-1"
 memory = 32
 compute = "AMPERE_A10"
 gpu_count = 1
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 2
 cooldown = 120
diff --git a/cerebrium/partner-services/rime.mdx b/cerebrium/partner-services/rime.mdx
index 6e519946..0cb96779 100644
--- a/cerebrium/partner-services/rime.mdx
+++ b/cerebrium/partner-services/rime.mdx
@@ -20,24 +20,24 @@ Cerebrium's partnership with [Rime](https://www.rime.ai/) helps teams deliver te
 cerebrium init rime
 ```
 
-3. Rime services use a simplified TOML configuration with the `[cerebrium.runtime.rime]` section. Create a `cerebrium.toml` file with the following:
+3. Rime services use a simplified TOML configuration with the `[runtime.rime]` section. Create a `cerebrium.toml` file with the following:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "rime"
 disable_auth = true
 
-[cerebrium.runtime.rime]
+[runtime.rime]
 port = 8001
 
-[cerebrium.hardware]
+[hardware]
 cpu = 4
 memory = 30
 compute = "AMPERE_A10"
 gpu_count = 1
 region = "us-east-1"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 1
 max_replicas = 2
 cooldown = 120
diff --git a/cerebrium/scaling/batching-concurrency.mdx b/cerebrium/scaling/batching-concurrency.mdx
index a22ddf48..0ca6371d 100644
--- a/cerebrium/scaling/batching-concurrency.mdx
+++ b/cerebrium/scaling/batching-concurrency.mdx
@@ -8,7 +8,7 @@ description: "Improve throughput and cost performance with batching and concurre
 Concurrency in Cerebrium allows each instance to process multiple requests simultaneously. The `replica_concurrency` setting in the `cerebrium.toml` file determines how many requests each instance handles in parallel:
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 replica_concurrency = 4    # Process up to 4 requests simultaneously.
 ```
 
@@ -27,13 +27,13 @@ Cerebrium supports two approaches to request batching.
 Many frameworks include features for processing multiple requests efficiently. vLLM, for example, automatically handles batched model inference requests:
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 2
 cooldown = 10
 replica_concurrency = 4 # Each container can now handle multiple requests.
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 sentencepiece = "latest"
 torch = "latest"
 vllm = "latest"
@@ -57,13 +57,13 @@ Applications requiring precise control over request processing can implement cus
 As an example, implementation with LitServe requires additional configuration in the `cerebrium.toml` file:
 
 ```toml
-[cerebrium.runtime.custom]
+[runtime.python]
 port = 8000
 entrypoint = ["python", "app/main.py"]
 healthcheck_endpoint = "/health"
 readycheck_endpoint = "/ready"
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 litserve = "latest"
 fastapi = "latest"
 ```
diff --git a/cerebrium/scaling/graceful-termination.mdx b/cerebrium/scaling/graceful-termination.mdx
index 3ca2377f..d61327ad 100644
--- a/cerebrium/scaling/graceful-termination.mdx
+++ b/cerebrium/scaling/graceful-termination.mdx
@@ -9,7 +9,7 @@ Cerebrium runs in a shared, multi-tenant environment. To efficiently scale, opti
 
 ## Understanding Instance Termination
 
-For both application autoscaling and our own internal node scaling, we will send your application a SIGTERM signal, as a warning to the application that we are intending to shut down this instance. For Cortex applications (Cerebriums default runtime), this is handled. On custom runtimes, should you wish to gracefully shut down, you will need to catch and handle this signal. Once at least `response_grace_period` has elapsed, we will send your application a SIGKILL signal, terminating the instance immediately.
+For both application autoscaling and our own internal node scaling, we will send your application a SIGTERM signal, as a warning to the application that we are intending to shut down this instance. For cortex applications (Cerebriums default runtime), this is handled. On custom runtimes, should you wish to gracefully shut down, you will need to catch and handle this signal. Once at least `response_grace_period` has elapsed, we will send your application a SIGKILL signal, terminating the instance immediately.
 
 When Cerebrium needs to terminate an contanier, we do the following:
 
@@ -22,7 +22,7 @@ Below is a chart that shows it more eloquently:
 
 ```mermaid
 flowchart TD
-    A[SIGTERM sent] --> B[Cortex]
+    A[SIGTERM sent] --> B[cortex]
     A --> C[Custom Runtime]
 
     B --> D[automatically captured]
@@ -125,12 +125,12 @@ def hello():
 If you already have a `cerebrium.toml` file, add or update these sections. If you don't have one, create a new file with the following:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "your-app-name"
 include = ["./*"]
 exclude = [".*"]
 
-[cerebrium.hardware]
+[hardware]
 cpu = 1
 memory = 1.0
 compute = "CPU"
@@ -138,13 +138,13 @@ gpu_count = 0
 provider = "aws"
 region = "us-east-1"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 2
 cooldown = 30
 replica_concurrency = 1  # Must match max_concurrency in your AppState
 
-[cerebrium.runtime.custom]
+[runtime.docker]
 port = 8000
 dockerfile_path = "./Dockerfile"
 ```
diff --git a/cerebrium/scaling/scaling-apps.mdx b/cerebrium/scaling/scaling-apps.mdx
index f418e389..2cdd06f6 100644
--- a/cerebrium/scaling/scaling-apps.mdx
+++ b/cerebrium/scaling/scaling-apps.mdx
@@ -23,7 +23,7 @@ As traffic decreases, instances enter a cooldown period at reduced concurrency.
 The `cerebrium.toml` file controls scaling behavior through several key parameters:
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0           # Minimum running instances
 max_replicas = 3           # Maximum concurrent instances
 cooldown = 60              # Cooldown period in seconds
@@ -67,7 +67,7 @@ Cerebrium ensures reliability through automatic instance health management. The
 Apps requiring maximum reliability often combine several scaling features:
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 min_replicas = 2              # Maintain redundant instances
 cooldown = 600                # Extended warm period
 max_replicas = 10             # Room for traffic spikes
@@ -79,7 +79,7 @@ During normal replica operation, this simply corresponds to a request timeout va
 waits for the specified grace period, issues a SIGKILL command if the instance has not stopped, and kills any active requests with a GatewayTimeout error.
 
 <Note>
-  When using the Cortex runtime (default), SIGTERM signals are automatically
+  When using the cortex runtime (default), SIGTERM signals are automatically
   handled to allow graceful termination of requests. For custom runtimes, you'll
   need to implement SIGTERM handling yourself to ensure requests complete
   gracefully before termination. See our [Graceful Termination
@@ -108,10 +108,10 @@ and so Cerebrium currently provides four scaling metrics to choose from:
 - `cpu_utilization`
 - `memory_utilization`
 
-These can be added to the `cerebrium.scaling` section as such, by specifying one of these metrics and a target:
+These can be added to the `scaling` section as such, by specifying one of these metrics and a target:
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 cooldown = 600
 max_replicas = 10
@@ -143,14 +143,14 @@ if `scaling_target=5`, Cerebrium will attempt to maintain a 5 requests/s average
 ### CPU Utilization
 
 `cpu_utilization` uses a maximum CPU percentage utilization averaged over all instances of an application to scale out, relative to the
-`cerebrium.hardware.cpu` value. For example, if an application has `cpu=2` and `scaling_target=80`, Cerebrium will attempt
+`hardware.cpu` value. For example, if an application has `cpu=2` and `scaling_target=80`, Cerebrium will attempt
 to maintain _80%_ CPU utilization (1.6 CPUs) per instance across your entire deployed service. Since there is no notion of
 scaling relative to 0 CPU units, it is required that `min_replicas=1` if using this metric.
 
 ### Memory Utilization
 
 `memory_utilization` uses a maximum memory percentage utilization averaged over all instances of an application to scale out, relative to the
-`cerebrium.hardware.memory` value. Note this refers to RAM, **not** GPU VRAM utilization. For example, if an application has `memory=10` and `scaling_target=80`, Cerebrium will attempt
+`hardware.memory` value. Note this refers to RAM, **not** GPU VRAM utilization. For example, if an application has `memory=10` and `scaling_target=80`, Cerebrium will attempt
 to maintain _80%_ Memory utilization (8GB) per instance across your entire deployed service. Since there is no notion of
 scaling relative to 0GB of memory, it is required that `min_replicas=1` if using this metric.
 
@@ -165,10 +165,10 @@ is only available when using the following scaling metrics:
 - `concurrency_utilization`
 - `requests_per_second`
 
-The buffer can be added to the `cerebrium.scaling` section as such, by specifying `scaling_buffer`:
+The buffer can be added to the `scaling` section as such, by specifying `scaling_buffer`:
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 min_replicas = 1
 cooldown = 600
 max_replicas = 10
@@ -192,7 +192,7 @@ Once this request has completed, the usual `cooldown` period will apply, and the
 The `evaluation_interval` parameter controls the time window (in seconds) over which the autoscaler evaluates metrics before making scaling decisions. The default is 30 seconds, with a valid range of 6-300 seconds.
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 evaluation_interval = 30  # Evaluate metrics over 30-second windows
 ```
 
@@ -211,7 +211,7 @@ A shorter interval makes the autoscaler more responsive to traffic spikes but ma
 The `load_balancing` parameter controls how incoming requests are distributed across your replicas. When not specified, the system automatically selects the best algorithm based on your `replica_concurrency` setting.
 
 ```toml
-[cerebrium.scaling]
+[scaling]
 load_balancing = "min-connections"  # Explicitly set load balancing algorithm
 ```
 
diff --git a/cerebrium/storage/managing-files.mdx b/cerebrium/storage/managing-files.mdx
index d6da4e4b..1acfd8a7 100644
--- a/cerebrium/storage/managing-files.mdx
+++ b/cerebrium/storage/managing-files.mdx
@@ -15,7 +15,7 @@ Cerebrium offers file management through a 50GB persistent volume that's availab
 The `cerebrium.toml` configuration file controls which files become part of the app:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 include = [
     "src/*.py",         # Python files in src.
     "config/*.json",    # JSON files in config.
diff --git a/migrations/hugging-face.mdx b/migrations/hugging-face.mdx
index 64e0d53a..a8f8d7c7 100644
--- a/migrations/hugging-face.mdx
+++ b/migrations/hugging-face.mdx
@@ -53,24 +53,26 @@ pip install cerebrium --upgrade
 Scaffold your application by running `cerebrium init [PROJECT_NAME]`. During the initialization, a `cerebrium.toml` is created. This file configures the deployment, hardware, scaling, and dependencies for your Cerebrium project. Update your `cerebrium.toml` file to reflect the following:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "llama-8b-vllm"
-python_version = "3.11"
-docker_base_image_url = "debian:bookworm-slim"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = [".*"]
 
-[cerebrium.hardware]
+[runtime.cortex]
+python_version = "3.11"
+docker_base_image_url = "debian:bookworm-slim"
+
+[hardware]
 cpu = 2
 memory = 12.0
 compute = "AMPERE_A10"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 5
 cooldown = 30
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 sentencepiece = "latest"
 torch = "latest"
 transformers = "latest"
@@ -83,10 +85,11 @@ bitsandbytes = "latest"
 
 Let's break down this configuration:
 
-- `cerebrium.deployment`: Specifies the project name, Python version, base Docker image, and which files to include/exclude as project files.
-- `cerebrium.hardware`: Defines the CPU, memory, and GPU requirements for your deployment.
-- `cerebrium.scaling`: Configures auto-scaling behavior, including minimum and maximum replicas, and cooldown period.
-- `cerebrium.dependencies.pip`: Lists the Python packages required for your project.
+- `deployment`: Specifies the project name and which files to include/exclude as project files.
+- `runtime.cortex`: Specifies the Python version and base Docker image.
+- `hardware`: Defines the CPU, memory, and GPU requirements for your deployment.
+- `scaling`: Configures auto-scaling behavior, including minimum and maximum replicas, and cooldown period.
+- `dependencies.pip`: Lists the Python packages required for your project.
 
 #### 1.3 Update your code
 
diff --git a/migrations/mystic.mdx b/migrations/mystic.mdx
index 30646c69..5006d7f1 100644
--- a/migrations/mystic.mdx
+++ b/migrations/mystic.mdx
@@ -62,26 +62,28 @@ Transforms into Cerebrium's easy-to-understand TOML config:
 
 ```toml
 # cerebrium.toml
-[cerebrium.deployment]
+[deployment]
 name = "stable-diffusion"
-python_version = "3.11"
-docker_base_image_url = "debian:bookworm-slim"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = [".*"]
 
-[cerebrium.hardware]
+[runtime.cortex]
+python_version = "3.11"
+docker_base_image_url = "debian:bookworm-slim"
+
+[hardware]
 compute = "AMPERE_A10"    # Choose your GPU type
 cpu = 4                   # Number of CPU cores
 memory = 16.0             # Memory in GB
 gpu_count = 1             # Number of GPUs
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0         # Save costs when inactive and scale down your app
 max_replicas = 2         # Handle increased traffic and scale up where necessary
 cooldown = 60            # Time window at reduced concurrency before scaling down
 replica_concurrency = 1  # The number of requests a single container can support
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 torch = ">=2.0.0"
 pydantic = "latest"
 transformers = "latest"
diff --git a/migrations/replicate.mdx b/migrations/replicate.mdx
index f93b6ebf..bed603a3 100644
--- a/migrations/replicate.mdx
+++ b/migrations/replicate.mdx
@@ -22,17 +22,19 @@ Now Cerebrium and Replicate have a common setup in that they both have a setup f
 Looking at the cog.yaml, we need to add/change the following in our cerebrium.toml
 
 ```python
-[cerebrium.deployment]
+[deployment]
 name = "cog-migration-sdxl"
-python_version = "3.11"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./example_exclude"]
+
+[runtime.cortex]
+python_version = "3.11"
 docker_base_image_url = "nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04"
 shell_commands = [
     "curl -o /usr/local/bin/pget -L 'https://github.com/replicate/pget/releases/download/v0.6.2/pget_linux_x86_64' && chmod +x /usr/local/bin/pget"
 ]
 
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "AMPERE_A10"
@@ -40,14 +42,14 @@ cpu = 2
 memory = 12.0
 gpu_count = 1
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 "accelerate" = "latest"
 "diffusers" = "latest"
 "torch" = "==2.0.1"
 "torchvision" = "==0.15.2"
 "transformers" = "latest"
 
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 "curl" = "latest"
 ```
 
diff --git a/toml-reference/toml-reference.mdx b/toml-reference/toml-reference.mdx
index 65b024d1..2859cfc1 100644
--- a/toml-reference/toml-reference.mdx
+++ b/toml-reference/toml-reference.mdx
@@ -3,39 +3,155 @@ title: TOML Reference
 description: Complete reference for all parameters available in Cerebrium's default `cerebrium.toml` configuration file.
 ---
 
+<Warning>
+**Deprecation Notice:** The `cerebrium.` prefix is being removed from all configuration sections. Sub-keys are becoming top-level keys (e.g., `[cerebrium.deployment]` → `[deployment]`). The prefixed format is still supported for backwards compatibility but will be removed in a future release. We recommend migrating to the new format.
+
+| Current (Deprecated)         | New Format         |
+| ---------------------------- | ------------------ |
+| `[cerebrium.deployment]`     | `[deployment]`     |
+| `[cerebrium.runtime.cortex]` | `[runtime.cortex]` |
+| `[cerebrium.hardware]`       | `[hardware]`       |
+| `[cerebrium.scaling]`        | `[scaling]`        |
+| `[cerebrium.dependencies]`   | `[dependencies]`   |
+
+</Warning>
+
 The configuration is organized into the following main sections:
 
-- **[cerebrium.deployment]** Core settings like app name, Python version, and file inclusion rules
-- **[cerebrium.runtime.custom]** Custom web server settings and app startup behavior
-- **[cerebrium.hardware]** Compute resources including CPU, memory, and GPU specifications
-- **[cerebrium.scaling]** Auto-scaling behavior and replica management
-- **[cerebrium.dependencies]** Package management for Python (pip), system (apt), and Conda dependencies
+- **[deployment]** Core settings like app name and file inclusion rules
+- **[runtime.cortex]** Default Cerebrium-managed Python runtime (build settings)
+- **[runtime.python]** Custom Python ASGI/WSGI web server settings
+- **[runtime.docker]** Custom Dockerfile settings
+- **[hardware]** Compute resources including CPU, memory, and GPU specifications
+- **[scaling]** Auto-scaling behavior and replica management
+- **[dependencies]** Package management for Python (pip), system (apt), and Conda dependencies
 
 ## Deployment Configuration
 
-The `[cerebrium.deployment]` section defines core deployment settings.
-
-| Option                            | Type     | Default                | Description                                                                                                  |
-| --------------------------------- | -------- | ---------------------- | ------------------------------------------------------------------------------------------------------------ |
-| name                              | string   | required               | Desired app name                                                                                             |
-| python_version                    | string   | "3.12"                 | Python version to use (3.10, 3.11, 3.12)                                                                     |
-| disable_auth                      | boolean  | false                  | Disable default token-based authentication on app endpoints                                                  |
-| include                           | string[] | ["*"]                  | Files/patterns to include in deployment                                                                      |
-| exclude                           | string[] | [".*"]                 | Files/patterns to exclude from deployment                                                                    |
-| shell_commands                    | string[] | []                     | Commands to run at the end of the build                                                                      |
-| pre_build_commands                | string[] | []                     | Commands to run before dependencies install                                                                  |
-| docker_base_image_url             | string   | "debian:bookworm-slim" | Base Docker image                                                                                            |
-| use_uv                            | boolean  | false                  | Use UV for faster Python package installation                                                                |
-| deployment_initialization_timeout | integer  | 600 (10 minutes)       | The max time to wait for app initialisation during build before timing out. Value must be between 60 and 830 |
+The `[deployment]` section defines core deployment settings that apply to all runtime types.
+
+| Option                            | Type     | Default          | Description                                                                                                  |
+| --------------------------------- | -------- | ---------------- | ------------------------------------------------------------------------------------------------------------ |
+| name                              | string   | required         | Desired app name                                                                                             |
+| disable_auth                      | boolean  | false            | Disable default token-based authentication on app endpoints                                                  |
+| include                           | string[] | ["*"]            | Files/patterns to include in deployment                                                                      |
+| exclude                           | string[] | [".*"]           | Files/patterns to exclude from deployment                                                                    |
+| deployment_initialization_timeout | integer  | 600 (10 minutes) | The max time to wait for app initialisation during build before timing out. Value must be between 60 and 830 |
+
+## Runtime Configuration
+
+Cerebrium supports three runtime types. You should only specify one runtime section in your configuration.
+
+### Auto-Py Runtime (Default)
+
+The `[runtime.cortex]` section configures the default Cerebrium-managed Python runtime. This is ideal for standard Python applications where Cerebrium automatically manages the web server.
+
+| Option                | Type     | Default                | Description                                   |
+| --------------------- | -------- | ---------------------- | --------------------------------------------- |
+| python_version        | string   | "3.11"                 | Python version to use (3.10, 3.11, 3.12)      |
+| docker_base_image_url | string   | "debian:bookworm-slim" | Base Docker image                             |
+| shell_commands        | string[] | []                     | Commands to run at the end of the build       |
+| pre_build_commands    | string[] | []                     | Commands to run before dependencies install   |
+| use_uv                | boolean  | false                  | Use UV for faster Python package installation |
+
+**Example:**
+
+```toml
+[deployment]
+name = "my-app"
+
+[runtime.cortex]
+python_version = "3.12"
+docker_base_image_url = "debian:bookworm-slim"
+use_uv = true
+```
 
 <Info>
   Changes to python_version or docker_base_image_url trigger full rebuilds since
   they affect the base environment.
 </Info>
 
+### Python Runtime (Custom ASGI/WSGI)
+
+The `[runtime.python]` section configures custom Python ASGI/WSGI web servers. Use this when you need full control over your web server implementation for features like custom authentication, dynamic batching, or WebSocket connections.
+
+| Option                | Type     | Default                                                            | Description                                                            |
+| --------------------- | -------- | ------------------------------------------------------------------ | ---------------------------------------------------------------------- |
+| python_version        | string   | "3.11"                                                             | Python version to use (3.10, 3.11, 3.12)                               |
+| docker_base_image_url | string   | "debian:bookworm-slim"                                             | Base Docker image                                                      |
+| shell_commands        | string[] | []                                                                 | Commands to run at the end of the build                                |
+| pre_build_commands    | string[] | []                                                                 | Commands to run before dependencies install                            |
+| use_uv                | boolean  | false                                                              | Use UV for faster Python package installation                          |
+| entrypoint            | string[] | ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] | Command to start the application                                       |
+| port                  | integer  | 8000                                                               | Port the application listens on                                        |
+| healthcheck_endpoint  | string   | ""                                                                 | HTTP path for health checks (empty uses TCP). Failure causes restart   |
+| readycheck_endpoint   | string   | ""                                                                 | HTTP path for readiness checks (empty uses TCP). Failure stops routing |
+
+**Example:**
+
+```toml
+[deployment]
+name = "my-fastapi-app"
+
+[runtime.python]
+python_version = "3.11"
+entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+port = 8000
+healthcheck_endpoint = "/health"
+readycheck_endpoint = "/ready"
+
+[runtime.python.dependencies.pip]
+fastapi = "latest"
+uvicorn = "latest"
+```
+
+<Info>
+  The port specified in entrypoint must match the port parameter. All endpoints
+  will be available at `https://api.aws.us-east-1.cerebrium.ai/v4/{project - id}
+  /{app - name}/your/endpoint`
+</Info>
+
+### Docker Runtime (Custom Dockerfile)
+
+The `[runtime.docker]` section configures deployments using custom Dockerfiles. Use this for non-Python applications or when you need complete control over the container build process.
+
+| Option               | Type     | Default  | Description                                                                                                                                                |
+| -------------------- | -------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| dockerfile_path      | string   | required | Relative path to a custom Dockerfile                                                                                                                       |
+| entrypoint           | string[] | []       | Command to start the application. Required if Dockerfile has no CMD/ENTRYPOINT. **Always takes precedence over Dockerfile CMD/ENTRYPOINT when specified.** |
+| port                 | integer  | 8000     | Port the application listens on                                                                                                                            |
+| healthcheck_endpoint | string   | ""       | HTTP path for health checks (empty uses TCP). Failure causes the instance to restart                                                                       |
+| readycheck_endpoint  | string   | ""       | HTTP path for readiness checks (empty uses TCP). Failure ensures the load balancer does not route to the instance                                          |
+
+**Example:**
+
+```toml
+[deployment]
+name = "my-docker-app"
+
+[runtime.docker]
+dockerfile_path = "./Dockerfile"
+port = 8080
+healthcheck_endpoint = "/health"
+readycheck_endpoint = "/ready"
+```
+
+<Info>
+  **Entrypoint Precedence:** When both `entrypoint` in `cerebrium.toml` and
+  `CMD`/`ENTRYPOINT` in your Dockerfile are defined, the TOML `entrypoint`
+  always takes precedence. This allows you to override your Dockerfile's default
+  command at deploy time without modifying the Dockerfile itself.
+</Info>
+
+<Warning>
+  When using `dockerfile_path`, all dependencies and build commands should be
+  handled within the Dockerfile. The `[dependencies.*]` sections,
+  `shell_commands`, and `pre_build_commands` will be ignored.
+</Warning>
+
 ### UV Package Manager
 
-UV is a fast Python package installer written in Rust that can significantly speed up deployment times. When enabled, UV will be used instead of pip for installing Python dependencies.
+UV is a fast Python package installer written in Rust that can significantly speed up deployment times. When enabled in `[runtime.cortex]` or `[runtime.python]`, UV will be used instead of pip for installing Python dependencies.
 
 <Info>
 UV typically installs packages 10-100x faster than pip, especially beneficial for:
@@ -49,7 +165,7 @@ UV typically installs packages 10-100x faster than pip, especially beneficial fo
 **Example with UV enabled:**
 
 ```toml
-[cerebrium.deployment]
+[runtime.cortex]
 use_uv = true
 ```
 
@@ -89,26 +205,9 @@ Include in your deployment:
 - Ensure requirements.txt is in your project directory
 - Deploy with UV enabled
 
-## Runtime Configuration
-
-The `[cerebrium.runtime.custom]` section configures custom web servers and runtime behavior.
-
-| Option               | Type     | Default  | Description                                                                                                       |
-| -------------------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------------- |
-| port                 | integer  | required | Port the application listens on                                                                                   |
-| entrypoint           | string[] | required | Command to start the application                                                                                  |
-| healthcheck_endpoint | string   | ""       | HTTP path for health checks (empty uses TCP). Failure causes the instance to restart                              |
-| readycheck_endpoint  | string   | ""       | HTTP path for readiness checks (empty uses TCP). Failure ensures the load balancer does not route to the instance |
-
-<Info>
-  The port specified in entrypoint must match the port parameter. All endpoints
-  will be available at `https://api.aws.us-east-1.cerebrium.ai/v4/{project - id}
-  /{app - name}/your/endpoint`
-</Info>
-
 ## Hardware Configuration
 
-The `[cerebrium.hardware]` section defines compute resources.
+The `[hardware]` section defines compute resources.
 
 | Option    | Type    | Default     | Description                          |
 | --------- | ------- | ----------- | ------------------------------------ |
@@ -126,7 +225,7 @@ The `[cerebrium.hardware]` section defines compute resources.
 
 ## Scaling Configuration
 
-The `[cerebrium.scaling]` section controls auto-scaling behavior.
+The `[scaling]` section controls auto-scaling behavior.
 
 | Option                    | Type    | Default                   | CLI Requirement | Description                                                                                                                                                                                             |
 | ------------------------- | ------- | ------------------------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@@ -151,8 +250,8 @@ The `scaling_metric` options are:
 
 - **concurrency_utilization**: Maintains a percentage of your replica_concurrency across instances. For example, with `replica_concurrency=200` and `scaling_target=80`, maintains 160 requests per instance.
 - **requests_per_second**: Maintains a specific request rate across all instances. For example, `scaling_target=5` maintains 5 requests/s average across instances.
-- **cpu_utilization**: Maintains CPU usage as a percentage of cerebrium.hardware.cpu. For example, with `cpu=2` and `scaling_target=80`, maintains 80% CPU utilization (1.6 CPUs) per instance.
-- **memory_utilization**: Maintains RAM usage as a percentage of cerebrium.hardware.memory. For example, with `memory=10` and `scaling_target=80`, maintains 80% memory utilization (8GB) per instance.
+- **cpu_utilization**: Maintains CPU usage as a percentage of hardware.cpu. For example, with `cpu=2` and `scaling_target=80`, maintains 80% CPU utilization (1.6 CPUs) per instance.
+- **memory_utilization**: Maintains RAM usage as a percentage of hardware.memory. For example, with `memory=10` and `scaling_target=80`, maintains 80% memory utilization (8GB) per instance.
 
 <Info>
 The scaling_buffer option is only available with concurrency_utilization and requests_per_second metrics.
@@ -164,70 +263,126 @@ For example, with `min_replicas=0` and `scaling_buffer=3`, the system will maint
 
 ## Dependencies
 
-### Pip Dependencies
+Dependencies can be specified either at the runtime level (recommended) or at the top level. Runtime-specific dependencies are the preferred approach as they keep dependencies alongside their runtime configuration.
+
+### Runtime-Specific Dependencies (Recommended)
+
+Dependencies can be specified within the runtime section. This is the recommended approach:
+
+```toml
+[runtime.cortex]
+python_version = "3.12"
+
+[runtime.cortex.dependencies.pip]
+torch = "==2.0.0"      # Exact version
+numpy = "latest"       # Latest version
+pandas = ">=1.5.0"     # Minimum version
+
+[runtime.cortex.dependencies.apt]
+ffmpeg = "latest"
+libopenblas-base = "latest"
+
+[runtime.cortex.dependencies.conda]
+cuda = ">=11.7"
+cudatoolkit = "11.7"
+```
+
+#### Using Requirements Files
+
+You can also specify a requirements file using the special `_file_relative_path` key. When both a file and inline packages are specified, they are merged (inline packages take precedence):
+
+```toml
+[runtime.cortex.dependencies.pip]
+_file_relative_path = "requirements.txt"  # Base packages from file
+numpy = "1.24.0"                          # Override or add packages
+```
+
+This approach works with any runtime type (`cortex`, `python`, or partner services).
+
+<Warning>
+  **Deprecated:** Top-level `[dependencies.*]` sections are deprecated. Please
+  move your dependencies to `[runtime.{type}.dependencies.*]`. Dependencies
+  specified at the runtime level take precedence over top-level dependencies
+  when both are present.
+</Warning>
 
-The `[cerebrium.dependencies.pip]` section lists Python package requirements.
+### Top-Level Dependencies (Deprecated)
+
+<Warning>
+  This approach is deprecated. Please migrate to runtime-specific dependencies
+  above.
+</Warning>
+
+#### Pip Dependencies
+
+The `[dependencies.pip]` section lists Python package requirements.
 
 ```toml
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 torch = "==2.0.0"      # Exact version
 numpy = "latest"       # Latest version
 pandas = ">=1.5.0"     # Minimum version
 ```
 
-### APT Dependencies
+#### APT Dependencies
 
-The `[cerebrium.dependencies.apt]` section specifies system packages.
+The `[dependencies.apt]` section specifies system packages.
 
 ```toml
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 ffmpeg = "latest"
 libopenblas-base = "latest"
 ```
 
-### Conda Dependencies
+#### Conda Dependencies
 
-The `[cerebrium.dependencies.conda]` section manages Conda packages.
+The `[dependencies.conda]` section manages Conda packages.
 
 ```toml
-[cerebrium.dependencies.conda]
+[dependencies.conda]
 cuda = ">=11.7"
 cudatoolkit = "11.7"
 ```
 
-### Dependency Files
+#### Dependency Files
 
-The `[cerebrium.dependencies.paths]` section allows using requirement files.
+Use the special `_file_relative_path` key to load dependencies from a file. When both a file and inline packages are specified, they are merged (inline packages take precedence):
 
 ```toml
-[cerebrium.dependencies.paths]
-pip = "requirements.txt"
-apt = "pkglist.txt"
-conda = "conda_pkglist.txt"
+[dependencies.pip]
+_file_relative_path = "requirements.txt"  # Load from file
+numpy = "1.24.0"                          # Override or add packages
+
+[dependencies.apt]
+_file_relative_path = "pkglist.txt"
+
+[dependencies.conda]
+_file_relative_path = "conda_pkglist.txt"
 ```
 
-## Complete Example
+## Complete Examples
+
+### Auto-Py Runtime (Default)
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "llm-inference"
-python_version = "3.12"
 disable_auth = false
 include = ["*"]
 exclude = [".*"]
-shell_commands = []
-pre_build_commands = []
+
+[runtime.cortex]
+python_version = "3.12"
 docker_base_image_url = "debian:bookworm-slim"
 use_uv = true
-# Enable fast package installation with UV (omit or set to false if you want to use pip)
+shell_commands = []
+pre_build_commands = []
 
-[cerebrium.runtime.custom]
-port = 8000
-entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
-healthcheck_endpoint = "/health"
-readycheck_endpoint = "/ready"
+[runtime.cortex.dependencies.pip]
+torch = "latest"
+transformers = "latest"
 
-[cerebrium.hardware]
+[hardware]
 cpu = 4
 memory = 16.0
 compute = "AMPERE_A10"
@@ -235,7 +390,7 @@ gpu_count = 1
 provider = "aws"
 region = "us-east-1"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 2
 replica_concurrency = 10
@@ -244,23 +399,133 @@ cooldown = 1800
 scaling_metric = "concurrency_utilization"
 scaling_target = 100
 evaluation_interval = 30
-# load_balancing = ""  # Auto-selects based on replica_concurrency
 roll_out_duration_seconds = 0
+```
+
+### Python Runtime (FastAPI)
+
+```toml
+[deployment]
+name = "fastapi-server"
+disable_auth = false
+include = ["*"]
+exclude = [".*"]
 
-[cerebrium.dependencies.pip]
+[runtime.python]
+python_version = "3.11"
+entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+port = 8000
+healthcheck_endpoint = "/health"
+readycheck_endpoint = "/ready"
+
+[runtime.python.dependencies.pip]
 torch = "latest"
 transformers = "latest"
 uvicorn = "latest"
+fastapi = "latest"
 
-[cerebrium.dependencies.apt]
-ffmpeg = "latest"
+[hardware]
+cpu = 4
+memory = 16.0
+compute = "AMPERE_A10"
+gpu_count = 1
+provider = "aws"
+region = "us-east-1"
+
+[scaling]
+min_replicas = 0
+max_replicas = 2
+replica_concurrency = 10
+```
+
+### Docker Runtime
+
+```toml
+[deployment]
+name = "rust-server"
+include = ["*"]
+exclude = [".*"]
 
-[cerebrium.dependencies.conda]
-# Optional conda dependencies
+[runtime.docker]
+dockerfile_path = "./Dockerfile"
+port = 8192
+healthcheck_endpoint = "/health"
+readycheck_endpoint = "/ready"
 
-[cerebrium.dependencies.paths]
-# Optional paths to dependency files
-# pip = "requirements.txt"
-# apt = "pkglist.txt"
-# conda = "conda_pkglist.txt"
+[hardware]
+cpu = 4
+memory = 16.0
+compute = "CPU"
+provider = "aws"
+region = "us-east-1"
+
+[scaling]
+min_replicas = 0
+max_replicas = 2
+replica_concurrency = 10
 ```
+
+## Backwards Compatibility
+
+<Warning>
+  The following configuration patterns are deprecated but still supported for
+  backwards compatibility. We recommend migrating to the new format.
+</Warning>
+
+### Deprecated: cerebrium.\* prefix
+
+The `cerebrium.` prefix on all section names is deprecated. Please migrate to the new format:
+
+| Deprecated Format              | New Format           |
+| ------------------------------ | -------------------- |
+| `[cerebrium.deployment]`       | `[deployment]`       |
+| `[cerebrium.runtime.cortex]`   | `[runtime.cortex]`   |
+| `[cerebrium.runtime.python]`   | `[runtime.python]`   |
+| `[cerebrium.runtime.docker]`   | `[runtime.docker]`   |
+| `[cerebrium.hardware]`         | `[hardware]`         |
+| `[cerebrium.scaling]`          | `[scaling]`          |
+| `[cerebrium.dependencies.pip]` | `[dependencies.pip]` |
+| `[cerebrium.dependencies.apt]` | `[dependencies.apt]` |
+
+### Deprecated: Runtime fields in [deployment]
+
+The following fields in `[deployment]` are deprecated. Please move them to the appropriate runtime section:
+
+| Deprecated Field      | New Location                             |
+| --------------------- | ---------------------------------------- |
+| python_version        | `[runtime.cortex]` or `[runtime.python]` |
+| docker_base_image_url | `[runtime.cortex]` or `[runtime.python]` |
+| shell_commands        | `[runtime.cortex]` or `[runtime.python]` |
+| pre_build_commands    | `[runtime.cortex]` or `[runtime.python]` |
+| use_uv                | `[runtime.cortex]` or `[runtime.python]` |
+
+### Deprecated: [runtime.custom]
+
+The `[runtime.custom]` section is deprecated. Please migrate to:
+
+- `[runtime.python]` - For custom Python ASGI/WSGI applications
+- `[runtime.docker]` - For custom Dockerfile deployments (when using `dockerfile_path`)
+
+### Deprecated: Top-level [dependencies.*]
+
+Top-level dependency sections are deprecated. Please move dependencies to runtime-specific sections:
+
+| Deprecated Location    | New Location                          |
+| ---------------------- | ------------------------------------- |
+| `[dependencies.pip]`   | `[runtime.{type}.dependencies.pip]`   |
+| `[dependencies.apt]`   | `[runtime.{type}.dependencies.apt]`   |
+| `[dependencies.conda]` | `[runtime.{type}.dependencies.conda]` |
+
+Where `{type}` is your runtime type (e.g., `cortex`, `python`).
+
+<Note>
+  The `[dependencies.paths]` section has been replaced with
+  `_file_relative_path` key inside dependency maps. See the Dependencies section
+  above for the new syntax.
+</Note>
+
+<Info>
+  When both top-level and runtime-specific dependencies are present,
+  runtime-specific dependencies take precedence on a per-package basis. This
+  allows gradual migration.
+</Info>
diff --git a/v4/examples/aiVoiceAgents.mdx b/v4/examples/aiVoiceAgents.mdx
index f554a880..f59c4dbb 100644
--- a/v4/examples/aiVoiceAgents.mdx
+++ b/v4/examples/aiVoiceAgents.mdx
@@ -35,24 +35,24 @@ For our LLM we deploy a OpenAI compatible Llama-3 endpoint using the vLLM framew
 Run `cerebrium init llama-llm` and add the following code to your cerebrium.toml:
 
 ```
-[cerebrium.deployment]
+[deployment]
 name = "llama-llm"
 python_version = "3.11"
 docker_base_image_url = "debian:bookworm-slim"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = [".*"]
 
-[cerebrium.hardware]
+[hardware]
 cpu = 4
 memory = 12.0
 compute = "ADA_L40"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 1
 max_replicas = 5
 cooldown = 60
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 vllm = "latest"
 pydantic = "latest"
 ```
@@ -169,13 +169,13 @@ In your IDE, run the following command to create our Cerebrium starter project:
 Add the following pip packages to your `cerebrium.toml` to create your deployment environment:
 
 ```
-[cerebrium.deployment]
+[deployment]
 name = "pipecat-agent"
 python_version = "3.11"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./example_exclude"]
 
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "CPU"
@@ -183,12 +183,12 @@ cpu = 6
 memory = 18.0
 gpu_count = 1
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 1 # Note: This incurs a constant cost since at least one instance is always running.
 max_replicas = 2
 cooldown = 180
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 torch = ">=2.0.0"
 "pipecat-ai[silero, daily, openai, deepgram, cartesia]" = "==0.0.67"
 aiohttp = ">=3.9.4"
diff --git a/v4/examples/asgi-gradio-interface.mdx b/v4/examples/asgi-gradio-interface.mdx
index 06d40ae1..e69b884c 100644
--- a/v4/examples/asgi-gradio-interface.mdx
+++ b/v4/examples/asgi-gradio-interface.mdx
@@ -39,30 +39,30 @@ cerebrium init 2-gradio-interface
 Next, let us add the following configuration to our `cerebrium.toml` file:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "2-gradio-interface"
 python_version = "3.12"
 disable_auth = true
 include = ['./*', 'main.py', 'cerebrium.toml']
 exclude = ['.*']
 
-[cerebrium.runtime.custom]
+[runtime.custom]
 entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
 port = 8080
 healthcheck_endpoint = "/health"
 
-[cerebrium.hardware]
+[hardware]
 cpu = 2
 memory = 4.0
 compute = "CPU"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 2
 cooldown = 30
 replica_concurrency = 10
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 gradio = "latest"
 fastapi = "latest"
 requests = "latest"
diff --git a/v4/examples/comfyUI.mdx b/v4/examples/comfyUI.mdx
index 31436a51..79e4a5cb 100644
--- a/v4/examples/comfyUI.mdx
+++ b/v4/examples/comfyUI.mdx
@@ -261,24 +261,24 @@ def shutdown_event():
 Before deploying our application, we must first define the deployment configuration, which installs all our dependencies, contains our hardware and scaling configurations, as well as any scripts which must be executed before running our app. The following should be added to our `cerebrium.toml` file:
 
 ```
-[cerebrium.deployment]
+[deployment]
 name = "1-comfyui"
 python_version = "3.11"
 include = ["./*", "main.py", "cerebrium.toml", "workflow.json", "workflow_api.json", "helpers.py", "model.json"]
 exclude = ["./example_exclude", "./ComfyUI", "./ComfyUI/models/checkpoints/sd_xl_base_1.0.safetensors", "./ComfyUI/models/controlnet/diffusion_pytorch_model.fp16.safetensors"]
 shell_commands = ["git clone https://github.com/comfyanonymous/ComfyUI", "pip install -r ComfyUI/requirements.txt"]
 
-[cerebrium.runtime.custom]
+[runtime.custom]
 port = 8765
 entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8765"]
 healthcheck_endpoint = "/health"
 
-[cerebrium.hardware]
+[hardware]
 compute = "AMPERE_A10"
 cpu = 4
 memory = 16.0
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 2
 cooldown = 30
@@ -288,7 +288,7 @@ scaling_metric = "concurrency_utilization"
 scaling_target = 100
 scaling_buffer = 0
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 uvicorn = "latest"
 fastapi = "latest"
 requests = "latest"
@@ -313,7 +313,7 @@ tqdm = "latest"
 psutil = "latest"
 kornia = ">=0.7.1"
 
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 git = "latest"
 ```
 
diff --git a/v4/examples/deploy-a-vision-language-model-with-sglang.mdx b/v4/examples/deploy-a-vision-language-model-with-sglang.mdx
index f507c1f6..d70c4d8a 100644
--- a/v4/examples/deploy-a-vision-language-model-with-sglang.mdx
+++ b/v4/examples/deploy-a-vision-language-model-with-sglang.mdx
@@ -72,25 +72,25 @@ While we use flashinfer as our backend here, other options like flash attention
 Update your cerebrium.toml with:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "7-vision-language-sglang"
 python_version = "3.11"
 docker_base_image_url = "nvidia/cuda:12.8.0-devel-ubuntu22.04"
 deployment_initialization_timeout = 860
 
-[cerebrium.hardware]
+[hardware]
 cpu = 6.0
 memory = 60.0
 compute = "ADA_L40"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 2
 
-[cerebrium.build]
+[build]
 use_uv = true
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 transformers = "latest"
 huggingface_hub = "latest"
 pydantic = "latest"
@@ -101,10 +101,10 @@ torch = "latest"
 "sgl-kernel" = "latest"
 "flashinfer-python" = "latest"
 
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 libnuma-dev = "latest"
 
-[cerebrium.runtime.custom]
+[runtime.custom]
 port = 8000
 entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
 ```
diff --git a/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx b/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx
index 6874dcf5..4cd36407 100644
--- a/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx
+++ b/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx
@@ -388,7 +388,7 @@ The Dockerfile uses Nvidia's official Triton container with TensorRT-LLM pre-ins
 We now configure our container and autoscaling environment in `cerebrium.toml`. This file defines the hardware resources and scaling behavior:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "tensorrt-triton-demo"
 python_version = "3.12"
 disable_auth = true
@@ -396,7 +396,7 @@ include = ['./*', 'cerebrium.toml']
 exclude = ['.*']
 deployment_initialization_timeout = 830
 
-[cerebrium.hardware]
+[hardware]
 cpu = 4.0
 memory = 40.0
 compute = "AMPERE_A10"
@@ -404,14 +404,14 @@ gpu_count = 1
 provider = "aws"
 region = "us-east-1"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 5
 cooldown = 300
 replica_concurrency = 128
 scaling_metric = "concurrency_utilization"
 
-[cerebrium.runtime.custom]
+[runtime.custom]
 port = 8000
 healthcheck_endpoint = "/v2/health/live"
 readycheck_endpoint = "/v2/health/ready"
diff --git a/v4/examples/gpt-oss.mdx b/v4/examples/gpt-oss.mdx
index 5b3e6b12..5168c0f5 100644
--- a/v4/examples/gpt-oss.mdx
+++ b/v4/examples/gpt-oss.mdx
@@ -27,7 +27,7 @@ In this tutorial, we will show the most simple variation of deploying this model
 2. Edit your toml file with the following settings
 
 ```
-[cerebrium.deployment]
+[deployment]
 name = "7-openai-gpt-oss"
 python_version = "3.12"
 docker_base_image_url = "nvidia/cuda:12.8.1-devel-ubuntu22.04"
@@ -42,14 +42,14 @@ pre_build_commands = [
     "uv pip install huggingface_hub[hf_transfer]==0.34"
 ]
 
-[cerebrium.hardware]
+[hardware]
 cpu = 8.0
 memory = 18.0
 compute = "HOPPER_H100"
 provider = "aws"
 region = "us-east-1"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 5
 cooldown = 30
@@ -57,7 +57,7 @@ replica_concurrency = 32
 scaling_metric = "concurrency_utilization"
 scaling
 
-[cerebrium.runtime.custom]
+[runtime.custom]
 port = 8000
 entrypoint = ["sh", "-c", "export HF_HUB_ENABLE_HF_TRANSFER=1 && export VLLM_USER_V1=1 && vllm serve openai/gpt-oss-20b --enforce-eager"]
 ```
diff --git a/v4/examples/high-throughput-embeddings.mdx b/v4/examples/high-throughput-embeddings.mdx
index ffe131c3..539afc0c 100644
--- a/v4/examples/high-throughput-embeddings.mdx
+++ b/v4/examples/high-throughput-embeddings.mdx
@@ -36,7 +36,7 @@ docker login -u your-dockerhub-username
 Now that you are logged in, you can add the following to your <b>cerebrium.toml</b>
 
 ```
-[cerebrium.deployment]
+[deployment]
 name = "1-high-throughput"
 python_version = "3.11"
 docker_base_image_url = "michaelf34/infinity:0.0.77"
@@ -48,20 +48,20 @@ exclude = ['.*']
 Depending on your hardware type and model(s) you select you will have different autoscaling criteria. You can define this with the following sections in your cerebrium.toml:
 
 ```
-[cerebrium.hardware]
+[hardware]
 cpu = 6.0
 memory = 12.0
 compute = "AMPERE_A10"
 region = "us-east-1"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 2
 cooldown = 30
 replica_concurrency = 500
 scaling_metric = "concurrency_utilization"
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 numpy = "latest"
 "infinity-emb[all]" = "0.0.77"
 optimum = ">=1.24.0,<2.0.0"
@@ -193,7 +193,7 @@ async def classify(sentences: list[str] = Body(...), model_index: int = Body(3))
 Now you have a multi-purpose embedding server! Let us update our <b>cerebrium.toml</b> to point it to our FastAPI server. Add the following section:
 
 ```
-[cerebrium.runtime.custom]
+[runtime.custom]
 port = 5000
 entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "5000"]
 healthcheck_endpoint = "/health"
diff --git a/v4/examples/langchain-langsmith.mdx b/v4/examples/langchain-langsmith.mdx
index 39724e05..c073d157 100644
--- a/v4/examples/langchain-langsmith.mdx
+++ b/v4/examples/langchain-langsmith.mdx
@@ -144,7 +144,7 @@ Set up Cerebrium:
 Add these pip packages to your `cerebrium.toml`:
 
 ```
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 pydantic = "latest"
 langchain = "latest"
 pytz = "latest" ##this is used for timezones
diff --git a/v4/examples/langchain.mdx b/v4/examples/langchain.mdx
index 6d863d59..5d0bf041 100644
--- a/v4/examples/langchain.mdx
+++ b/v4/examples/langchain.mdx
@@ -17,10 +17,10 @@ First, create your project:
 cerebrium init 1-langchain-QA
 ```
 
-Add these Python packages to the `[cerebrium.dependencies.pip]` section of your `cerebrium.toml` file:
+Add these Python packages to the `[dependencies.pip]` section of your `cerebrium.toml` file:
 
 ```toml
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 pytube = "latest" # For audio downloading
 langchain = "latest"
 faiss-gpu = "latest"
@@ -30,10 +30,10 @@ transformers = ">=4.35.0"
 sentence_transformers = ">=2.2.0"
 ```
 
-Whisper requires ffmpeg and other Linux packages. Add them to the `[cerebrium.dependencies]` section:
+Whisper requires ffmpeg and other Linux packages. Add them to the `[deps]` section:
 
 ```toml
-[cerebrium.dependencies]
+[deps]
 apt = [ "ffmpeg", "libopenblas-base", "libomp-dev"]
 ```
 
@@ -148,35 +148,35 @@ Configure your compute and environment settings in `cerebrium.toml`:
 
 ```toml
 
-[cerebrium.build]
+[build]
 predict_data = "{\"prompt\": \"Here is some example predict data for your cerebrium.toml which will be used to test your predict function on build.\"}"
 force_rebuild = false
 disable_animation = false
 log_level = "INFO"
 disable_confirmation = false
 
-[cerebrium.deployment]
+[deployment]
 name = "langchain-qa"
 python_version = "3.10"
 include = ["./*", "main.py"]
 exclude = ["./.*", "./__*"]
 
-[cerebrium.hardware]
+[hardware]
 gpu = "AMPERE_A5000"
 cpu = 2
 memory = 16.0
 gpu_count = 1
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 cooldown = 60
 
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 ffmpeg = "latest"
 "libopenblas-base" = "latest"
 "libomp-dev" = "latest"
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 pytube = "latest" # For audio downloading
 langchain = "latest"
 faiss-gpu = "latest"
@@ -185,7 +185,7 @@ openai-whisper = "latest"
 transformers = ">=4.35.0"
 sentence_transformers = ">=2.2.0"
 
-[cerebrium.dependencies.conda]
+[dependencies.conda]
 
 ```
 
diff --git a/v4/examples/livekit-outbound-agent.mdx b/v4/examples/livekit-outbound-agent.mdx
index d7a1c05c..013fa15e 100644
--- a/v4/examples/livekit-outbound-agent.mdx
+++ b/v4/examples/livekit-outbound-agent.mdx
@@ -387,7 +387,7 @@ CMD ["python3", "main.py", "start"]
 In our `cerebrium.toml`, edit the values with the following contents:
 
 ```
-[cerebrium.deployment]
+[deployment]
 name = "outbound-livekit-agent"
 python_version = "3.11"
 docker_base_image_url = ""
@@ -395,21 +395,21 @@ disable_auth = false
 include = ['./*', 'main.py', 'cerebrium.toml']
 exclude = ['.*']
 
-[cerebrium.hardware]
+[hardware]
 cpu = 2
 memory = 8.0
 compute = "CPU"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 1
 max_replicas = 5
 cooldown = 30
 replica_concurrency = 1
 
-[cerebrium.dependencies.paths]
-pip = "requirements.txt"
+[dependencies.pip]
+_file_relative_path = "requirements.txt"
 
-[cerebrium.runtime.custom]
+[runtime.custom]
 port = 8600
 dockerfile_path = "./Dockerfile"
 ```
diff --git a/v4/examples/mistral-vllm.mdx b/v4/examples/mistral-vllm.mdx
index 17df4c4d..b95ee62f 100644
--- a/v4/examples/mistral-vllm.mdx
+++ b/v4/examples/mistral-vllm.mdx
@@ -23,10 +23,10 @@ First, create your project:
 cerebrium init 1-faster-inference-with-vllm
 ```
 
-Add these Python packages to the `[cerebrium.dependencies.pip]` section in your `cerebrium.toml` file:
+Add these Python packages to the `[dependencies.pip]` section in your `cerebrium.toml` file:
 
 ```toml
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 sentencepiece = "latest"
 torch = ">=2.0.0"
 vllm = "latest"
@@ -103,7 +103,7 @@ Configure your compute and environment settings in `cerebrium.toml`:
 
 ```toml
 
-[cerebrium.build]
+[build]
 predict_data = "{\"prompt\": \"Here is some example predict data for your config.yaml which will be used to test your predict function on build.\"}"
 hide_public_endpoint = false
 disable_animation = false
@@ -113,14 +113,14 @@ disable_predict = false
 log_level = "INFO"
 disable_confirmation = false
 
-[cerebrium.deployment]
+[deployment]
 name = "1-faster-inference-with-vllm"
 python_version = "3.11"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./example_exclude"]
 docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
 
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "AMPERE_A10"
@@ -128,12 +128,12 @@ cpu = 2
 memory = 16.0
 gpu_count = 1
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 5
 cooldown = 60
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 huggingface-hub = "latest"
 sentencepiece = "latest"
 torch = ">=2.0.0"
@@ -142,9 +142,9 @@ transformers = ">=4.35.0"
 accelerate = "latest"
 xformers = "latest"
 
-[cerebrium.dependencies.conda]
+[dependencies.conda]
 
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 ffmpeg = "latest"
 
 ```
diff --git a/v4/examples/openai-compatible-endpoint-vllm.mdx b/v4/examples/openai-compatible-endpoint-vllm.mdx
index 1451c108..c12fd736 100644
--- a/v4/examples/openai-compatible-endpoint-vllm.mdx
+++ b/v4/examples/openai-compatible-endpoint-vllm.mdx
@@ -19,12 +19,12 @@ In your IDE, run the following command to create our Cerebrium starter project:
 Add the following pip packages and hardware requirements to your `cerebrium.toml` to create your deployment environment:
 
 ```toml
-[cerebrium.hardware]
+[hardware]
 cpu = 2
 memory = 12.0
 compute = "AMPERE_A10"
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 vllm = "latest"
 pydantic = "latest"
 ```
diff --git a/v4/examples/realtime-voice-agents.mdx b/v4/examples/realtime-voice-agents.mdx
index f051a838..80fdeb82 100644
--- a/v4/examples/realtime-voice-agents.mdx
+++ b/v4/examples/realtime-voice-agents.mdx
@@ -38,24 +38,24 @@ For our LLM we deploy a OpenAI compatible Llama-3 endpoint using the vLLM framew
 Run `cerebrium init llama-llm` and add the following code to your cerebrium.toml:
 
 ```
-[cerebrium.deployment]
+[deployment]
 name = "llama-llm"
 python_version = "3.11"
 docker_base_image_url = "debian:bookworm-slim"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = [".*"]
 
-[cerebrium.hardware]
+[hardware]
 cpu = 4
 memory = 12.0
 compute = "ADA_L40"
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 1
 max_replicas = 5
 cooldown = 60
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 vllm = "latest"
 pydantic = "latest"
 ```
@@ -169,30 +169,30 @@ In your IDE, run the following command to create our pipecat-agent: `cerebrium i
 Add the following pip packages to your `cerebrium.toml` to create your deployment environment:
 
 ```
-[cerebrium.deployment]
+[deployment]
 # This file was automatically generated by Cerebrium as a starting point for your project.
 # You can edit it as you wish.
 # If you would like to learn more about your Cerebrium config, please visit https://docs.cerebrium.ai/cerebrium/environments/config-files#config-file-example
 
-[cerebrium.deployment]
+[deployment]
 name = "pipecat-agent"
 python_version = "3.11"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./example_exclude"]
 
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "CPU"
 cpu = 6
 memory = 12.0
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 1 # Note: This incurs a constant cost since at least one instance is always running.
 max_replicas = 2
 cooldown = 180
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 torch = ">=2.0.0"
 "pipecat-ai[silero, daily, openai, deepgram, cartesia]" = "==0.0.67"
 aiohttp = ">=3.9.4"
diff --git a/v4/examples/sdxl.mdx b/v4/examples/sdxl.mdx
index 5ecf164b..dd8cf8cd 100644
--- a/v4/examples/sdxl.mdx
+++ b/v4/examples/sdxl.mdx
@@ -27,13 +27,13 @@ Configure your compute and environment settings in `cerebrium.toml`:
 
 ```toml
 
-[cerebrium.deployment]
+[deployment]
 name = "3-sdxl-refiner"
 python_version = "3.10"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./.*", "./__*"]
 
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "AMPERE_A10"
@@ -41,21 +41,21 @@ cpu = 2
 memory = 16.0
 gpu_count = 1
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 5
 cooldown = 60
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 accelerate = "latest"
 transformers = ">=4.35.0"
 safetensors = "latest"
 opencv-python = "latest"
 diffusers = "latest"
 
-[cerebrium.dependencies.conda]
+[dependencies.conda]
 
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 ffmpeg = "latest"
 
 ```
diff --git a/v4/examples/streaming-falcon-7B.mdx b/v4/examples/streaming-falcon-7B.mdx
index 096d2711..457f9493 100644
--- a/v4/examples/streaming-falcon-7B.mdx
+++ b/v4/examples/streaming-falcon-7B.mdx
@@ -23,10 +23,10 @@ First, create your project:
 cerebrium init 5-streaming-endpoint
 ```
 
-Add the following packages to the `[cerebrium.dependencies.pip]` section of your `cerebrium.toml` file:
+Add the following packages to the `[dependencies.pip]` section of your `cerebrium.toml` file:
 
 ```toml
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 peft = "git+https://github.com/huggingface/peft.git"
 transformers = "git+https://github.com/huggingface/transformers.git"
 accelerate = "git+https://github.com/huggingface/accelerate.git"
@@ -128,7 +128,7 @@ The function receives inputs from our request object and uses `TextIteratorStrea
 Configure your compute and environment settings in `cerebrium.toml`:
 
 ```toml
-[cerebrium.build]
+[build]
 predict_data = "{\"prompt\": \"Here is some example predict data for your config.yaml which will be used to test your predict function on build.\"}"
 hide_public_endpoint = false
 disable_animation = false
@@ -138,14 +138,14 @@ disable_predict = false
 log_level = "INFO"
 disable_confirmation = false
 
-[cerebrium.deployment]
+[deployment]
 name = "5-streaming-endpoint"
 python_version = "3.11"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./example_exclude"]
 docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
 
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "AMPERE_A10"
@@ -153,12 +153,12 @@ cpu = 2
 memory = 16.0
 gpu_count = 1
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 5
 cooldown = 60
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 peft = "git+https://github.com/huggingface/peft.git"
 transformers = "git+https://github.com/huggingface/transformers.git"
 accelerate = "git+https://github.com/huggingface/accelerate.git"
@@ -167,9 +167,9 @@ sentencepiece = "latest"
 pydantic = "latest"
 torch = "2.1.0"
 
-[cerebrium.dependencies.conda]
+[dependencies.conda]
 
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 
 ```
 
diff --git a/v4/examples/transcribe-whisper.mdx b/v4/examples/transcribe-whisper.mdx
index acfaca5a..bec7c569 100644
--- a/v4/examples/transcribe-whisper.mdx
+++ b/v4/examples/transcribe-whisper.mdx
@@ -17,10 +17,10 @@ First, create your project:
 cerebrium init 1-whisper-transcription
 ```
 
-Add the following packages to the `[cerebrium.dependencies.pip]` section of your `cerebrium.toml` file:
+Add the following packages to the `[dependencies.pip]` section of your `cerebrium.toml` file:
 
 ```toml
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 accelerate = "latest"
 transformers = ">=4.35.0"
 openai-whisper = "latest"
@@ -106,14 +106,14 @@ The `predict` function, which runs only on inference requests, creates an audio
 Configure your compute and environment settings in `cerebrium.toml`:
 
 ```toml
-[cerebrium.deployment]
+[deployment]
 name = "1-whisper-transcription"
 python_version = "3.11"
 include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./example_exclude"]
 docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
 
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "AMPERE_A10"
@@ -121,20 +121,20 @@ cpu = 3
 memory = 12.0
 gpu_count = 1
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 0
 max_replicas = 5
 cooldown = 60
 
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 accelerate = "latest"
 transformers = ">=4.35.0"
 openai-whisper = "latest"
 pydantic = "latest"
 
-[cerebrium.dependencies.conda]
+[dependencies.conda]
 
-[cerebrium.dependencies.apt]
+[dependencies.apt]
 "ffmpeg" = "latest"
 
 ```
diff --git a/v4/examples/twilio-voice-agent.mdx b/v4/examples/twilio-voice-agent.mdx
index 7125da6a..aed73605 100644
--- a/v4/examples/twilio-voice-agent.mdx
+++ b/v4/examples/twilio-voice-agent.mdx
@@ -26,7 +26,7 @@ Set up Cerebrium:
 Add these pip packages to your `cerebrium.toml`:
 
 ```
-[cerebrium.dependencies.pip]
+[dependencies.pip]
 torch = ">=2.0.0"
 "pipecat-ai[silero, daily, openai, deepgram, cartesia, twilio]" = "0.0.47"
 aiohttp = ">=3.9.4"
@@ -99,7 +99,7 @@ Replace the stream URL with your deployment's base endpoint, using your project
 Configure Cerebrium to run the FastAPI server by adding this to `cerebrium.toml`:
 
 ```
-[cerebrium.runtime.custom]
+[runtime.custom]
 port = 8765
 entrypoint = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8765"]
 healthcheck_endpoint = "/health"
@@ -263,14 +263,14 @@ For scaling criteria, use Cerebrium's `replica_concurrency` setting to spawn new
 To make the two updates above you can update your cerebrium.toml to contain the following:
 
 ```
-[cerebrium.hardware]
+[hardware]
 region = "us-east-1"
 provider = "aws"
 compute = "CPU"
 cpu = 10
 memory = 8.0
 
-[cerebrium.scaling]
+[scaling]
 min_replicas = 1
 max_replicas = 3
 cooldown = 30
diff --git a/v4/examples/wandb-sweep.mdx b/v4/examples/wandb-sweep.mdx
index 30465087..36c10a47 100644
--- a/v4/examples/wandb-sweep.mdx
+++ b/v4/examples/wandb-sweep.mdx
@@ -114,17 +114,17 @@ Add this configuration:
 ```
 #existing configuration
 
-[cerebrium.hardware]
+[hardware]
 cpu = 6
 memory = 30.0
 compute = "ADA_L40"
 
-[cerebrium.scaling]
+[scaling]
 #existing configuration
 response_grace_period = 3600
 
-[cerebrium.dependencies.paths]
-pip = "requirements.txt"
+[dependencies.pip]
+_file_relative_path = "requirements.txt"
 ```
 
 Install the dependencies locally: