From f3725180f77c981519376fc6e56fd82af9fdb4ca Mon Sep 17 00:00:00 2001 From: Sam Date: Sat, 13 Dec 2025 11:02:33 +1100 Subject: [PATCH] feat: add docker support, gitignore --- .dockerignore | 43 ++++++++++++ .gitignore | 160 ++++++++++++++++++++++++++++++++++++++++++++ Dockerfile | 59 ++++++++++++++++ docker-compose.yaml | 44 ++++++++++++ 4 files changed, 306 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 docker-compose.yaml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..09f43f7 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,43 @@ +# Git +.git +.gitignore + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info +.eggs +*.egg +.venv +venv +ENV + +# IDE +.idea +.vscode +*.swp +*.swo + +# Build artifacts +build +dist + +# Testing +.pytest_cache +.coverage +htmlcov + +# Documentation and assets (not needed at runtime) +assets +*.md +!README.md + +# Tests (optional - include if you want to run tests in container) +# tests + +# OS files +.DS_Store +Thumbs.db diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..68bc17f --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8dc0447 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,59 @@ +# dInfer - Inference framework for diffusion LLMs +# https://github.com/inclusionAI/dInfer +# +# Build: +# docker build -t dinfer . +# +# Run benchmark with sample prompt (single GPU): +# docker run --gpus '"device=0"' -v /path/to/models:/models dinfer \ +# python benchmarks/benchmark.py --model_name /models/LLaDA-8B-Instruct \ +# --model_type llada --gpu 0 +# +# Run benchmark (multi-GPU with tensor parallelism): +# docker run --gpus all --ipc=host -v /path/to/models:/models dinfer \ +# python benchmarks/benchmark.py --model_name /models/LLaDA-MoE-7B-A1B-Instruct \ +# --model_type llada_moe --gpu 0,1,2,3 --use_tp +# +# Convert MoE model to FusedMoE format: +# docker run --gpus '"device=0"' -v /path/to/models:/models dinfer \ +# python -m tools.transfer --input /models/LLaDA-MoE-7B-A1B-Instruct \ +# --output /models/LLaDA-MoE-7B-A1B-Instruct-fused +# +# Interactive Python session: +# docker run -it --gpus all --ipc=host -v /path/to/models:/models dinfer python +# +# Note: OpenAI-compatible API serving is not currently supported. +# Use the Python API (dinfer.DiffusionLLMServing) for programmatic inference. + +FROM vllm/vllm-openai:nightly +# Or pin to a version, e.g.: v0.12.0 + +LABEL org.opencontainers.image.source="https://github.com/inclusionAI/dInfer" +LABEL org.opencontainers.image.description="dInfer - Inference framework for diffusion LLMs" +LABEL org.opencontainers.image.licenses="Apache-2.0" + +WORKDIR /app + +# Copy project files +COPY setup.py . +COPY python/ python/ +COPY tools/ tools/ +COPY benchmarks/ benchmarks/ +COPY evaluations/ evaluations/ + +# Install dInfer without pinned dependencies (base image provides vllm) +# Then install compatible versions of remaining dependencies +RUN pip install --no-cache-dir --no-deps . && \ + pip install --no-cache-dir scipy tqdm hf_transfer sglang + +# For running evaluations, install additional dependencies: +# pip install accelerate evaluate datasets lm_eval + +# Enable HuggingFace transfer for faster model downloads +ENV HF_HUB_ENABLE_HF_TRANSFER=1 + +# Disable tokenizers parallelism warning +ENV TOKENIZERS_PARALLELISM=false + +# Default to running a benchmark help command +CMD ["python", "benchmarks/benchmark.py", "--help"] diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..04900c5 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,44 @@ +# dInfer Docker Compose example +# +# Prerequisites: +# - NVIDIA Container Toolkit installed +# - Docker Compose v2.x with GPU support +# +# Example usage: +# +# - Run benchmark with sample prompt: +# docker compose run --rm dinfer python benchmarks/benchmark.py \ +# --model_name /models/LLaDA-MoE-7B-A1B-Instruct --model_type llada_moe \ +# --gpu 0,1,2,3 --use_tp +# +# - Convert a MoE model to FusedMoE format: +# docker compose run --rm dinfer python -m tools.transfer \ +# --input /models/LLaDA-MoE-7B-A1B-Instruct \ +# --output /models/LLaDA-MoE-7B-A1B-Instruct-fused +# +# - Interactive Python session: +# docker compose run --rm dinfer python + +services: + &name dinfer: + hostname: *name + container_name: *name + build: + context: https://github.com/sammcj/dInfer.git#master + dockerfile: Dockerfile + image: dinfer:latest + stdin_open: true + tty: true + volumes: + - ./models:/models + environment: + - HF_HUB_ENABLE_HF_TRANSFER=1 + - TOKENIZERS_PARALLELISM=false + ipc: host + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu]