inclusionAI · sammcj · Dec 13, 2025
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,43 @@
+# Git
+.git
+.gitignore
+
+# Python
+__pycache__
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info
+.eggs
+*.egg
+.venv
+venv
+ENV
+
+# IDE
+.idea
+.vscode
+*.swp
+*.swo
+
+# Build artifacts
+build
+dist
+
+# Testing
+.pytest_cache
+.coverage
+htmlcov
+
+# Documentation and assets (not needed at runtime)
+assets
+*.md
+!README.md
+
+# Tests (optional - include if you want to run tests in container)
+# tests
+
+# OS files
+.DS_Store
+Thumbs.db
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,59 @@
+# dInfer - Inference framework for diffusion LLMs
+# https://github.com/inclusionAI/dInfer
+#
+# Build:
+#   docker build -t dinfer .
+#
+# Run benchmark with sample prompt (single GPU):
+#   docker run --gpus '"device=0"' -v /path/to/models:/models dinfer \
+#     python benchmarks/benchmark.py --model_name /models/LLaDA-8B-Instruct \
+#     --model_type llada --gpu 0
+#
+# Run benchmark (multi-GPU with tensor parallelism):
+#   docker run --gpus all --ipc=host -v /path/to/models:/models dinfer \
+#     python benchmarks/benchmark.py --model_name /models/LLaDA-MoE-7B-A1B-Instruct \
+#     --model_type llada_moe --gpu 0,1,2,3 --use_tp
+#
+# Convert MoE model to FusedMoE format:
+#   docker run --gpus '"device=0"' -v /path/to/models:/models dinfer \
+#     python -m tools.transfer --input /models/LLaDA-MoE-7B-A1B-Instruct \
+#     --output /models/LLaDA-MoE-7B-A1B-Instruct-fused
+#
+# Interactive Python session:
+#   docker run -it --gpus all --ipc=host -v /path/to/models:/models dinfer python
+#
+# Note: OpenAI-compatible API serving is not currently supported.
+# Use the Python API (dinfer.DiffusionLLMServing) for programmatic inference.
+
+FROM vllm/vllm-openai:nightly
+# Or pin to a version, e.g.: v0.12.0
+
+LABEL org.opencontainers.image.source="https://github.com/inclusionAI/dInfer"
+LABEL org.opencontainers.image.description="dInfer - Inference framework for diffusion LLMs"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+
+WORKDIR /app
+
+# Copy project files
+COPY setup.py .
+COPY python/ python/
+COPY tools/ tools/
+COPY benchmarks/ benchmarks/
+COPY evaluations/ evaluations/
+
+# Install dInfer without pinned dependencies (base image provides vllm)
+# Then install compatible versions of remaining dependencies
+RUN pip install --no-cache-dir --no-deps . && \
+    pip install --no-cache-dir scipy tqdm hf_transfer sglang
+
+# For running evaluations, install additional dependencies:
+#   pip install accelerate evaluate datasets lm_eval
+
+# Enable HuggingFace transfer for faster model downloads
+ENV HF_HUB_ENABLE_HF_TRANSFER=1
+
+# Disable tokenizers parallelism warning
+ENV TOKENIZERS_PARALLELISM=false
+
+# Default to running a benchmark help command
+CMD ["python", "benchmarks/benchmark.py", "--help"]
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,44 @@
+# dInfer Docker Compose example
+#
+# Prerequisites:
+#   - NVIDIA Container Toolkit installed
+#   - Docker Compose v2.x with GPU support
+#
+# Example usage:
+#
+#   - Run benchmark with sample prompt:
+#      docker compose run --rm dinfer python benchmarks/benchmark.py \
+#        --model_name /models/LLaDA-MoE-7B-A1B-Instruct --model_type llada_moe \
+#        --gpu 0,1,2,3 --use_tp
+#
+#   - Convert a MoE model to FusedMoE format:
+#      docker compose run --rm dinfer python -m tools.transfer \
+#        --input /models/LLaDA-MoE-7B-A1B-Instruct \
+#        --output /models/LLaDA-MoE-7B-A1B-Instruct-fused
+#
+#   - Interactive Python session:
+#      docker compose run --rm dinfer python
+
+services:
+  &name dinfer:
+    hostname: *name
+    container_name: *name
+    build:
+      context: https://github.com/sammcj/dInfer.git#master
+      dockerfile: Dockerfile
+    image: dinfer:latest
+    stdin_open: true
+    tty: true
+    volumes:
+      - ./models:/models
+    environment:
+      - HF_HUB_ENABLE_HF_TRANSFER=1
+      - TOKENIZERS_PARALLELISM=false
+    ipc: host
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]