diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..927c579 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,92 @@ +FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 + +# Build arg for GPU architectures - specify which CUDA compute capabilities to compile for +# Common values: +# 7.0 - Tesla V100 +# 7.5 - RTX 2060, 2070, 2080, Titan RTX +# 8.0 - A100, A800 (Ampere data center) +# 8.6 - RTX 3060, 3070, 3080, 3090 (Ampere consumer) +# 8.9 - RTX 4070, 4080, 4090 (Ada Lovelace) +# 9.0 - H100, H800 (Hopper data center) +# 12.0 - RTX 5070, 5080, 5090 (Blackwell) - Note: sm_120 architecture +# +# Examples: +# RTX 3060: --build-arg CUDA_ARCHITECTURES="8.6" +# RTX 4090: --build-arg CUDA_ARCHITECTURES="8.9" +# Multiple: --build-arg CUDA_ARCHITECTURES="8.0;8.6;8.9" +# +# Note: Including 8.9 or 9.0 may cause compilation issues on some setups +# Default includes 8.0 and 8.6 for broad Ampere compatibility +ARG CUDA_ARCHITECTURES="8.0;8.6" + +ENV DEBIAN_FRONTEND=noninteractive + +# Install system dependencies +RUN apt update && \ + apt install -y \ + python3 python3-pip git wget curl cmake ninja-build \ + libgl1 libglib2.0-0 ffmpeg && \ + apt clean + +WORKDIR /workspace + +COPY requirements.txt . + +# Upgrade pip first +RUN pip install --upgrade pip setuptools wheel + +# Install requirements if exists +RUN pip install -r requirements.txt + +# Install PyTorch with CUDA support +RUN pip install --extra-index-url https://download.pytorch.org/whl/cu124 \ + torch==2.6.0+cu124 torchvision==0.21.0+cu124 + +# Install SageAttention from git (patch GPU detection) +ENV TORCH_CUDA_ARCH_LIST="${CUDA_ARCHITECTURES}" +ENV FORCE_CUDA="1" +ENV MAX_JOBS="1" + +COPY < Made with ❀️ by DeepBeepMeep -

+

diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100755 index 0000000..9af052d --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +export HOME=/home/user +export PYTHONUNBUFFERED=1 +export HF_HOME=/home/user/.cache/huggingface + +export OMP_NUM_THREADS=$(nproc) +export MKL_NUM_THREADS=$(nproc) +export OPENBLAS_NUM_THREADS=$(nproc) +export NUMEXPR_NUM_THREADS=$(nproc) + +export TORCH_ALLOW_TF32_CUBLAS=1 +export TORCH_ALLOW_TF32_CUDNN=1 + +# Disable audio warnings in Docker +export SDL_AUDIODRIVER=dummy +export PULSE_RUNTIME_PATH=/tmp/pulse-runtime + +# ═══════════════════════════ CUDA DEBUG CHECKS ═══════════════════════════ + +echo "πŸ” CUDA Environment Debug Information:" +echo "═══════════════════════════════════════════════════════════════════════" + +# Check CUDA driver on host (if accessible) +if command -v nvidia-smi >/dev/null 2>&1; then + echo "βœ… nvidia-smi available" + echo "πŸ“Š GPU Information:" + nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free --format=csv,noheader,nounits 2>/dev/null || echo "❌ nvidia-smi failed to query GPU" + echo "πŸƒ Running Processes:" + nvidia-smi --query-compute-apps=pid,name,used_memory --format=csv,noheader,nounits 2>/dev/null || echo "ℹ️ No running CUDA processes" +else + echo "❌ nvidia-smi not available in container" +fi + +# Check CUDA runtime libraries +echo "" +echo "πŸ”§ CUDA Runtime Check:" +if ls /usr/local/cuda*/lib*/libcudart.so* >/dev/null 2>&1; then + echo "βœ… CUDA runtime libraries found:" + ls /usr/local/cuda*/lib*/libcudart.so* 2>/dev/null +else + echo "❌ CUDA runtime libraries not found" +fi + +# Check CUDA devices +echo "" +echo "πŸ–₯️ CUDA Device Files:" +if ls /dev/nvidia* >/dev/null 2>&1; then + echo "βœ… NVIDIA device files found:" + ls -la /dev/nvidia* 2>/dev/null +else + echo "❌ No NVIDIA device files found - Docker may not have GPU access" +fi + +# Check CUDA environment variables +echo "" +echo "🌍 CUDA Environment Variables:" +echo " CUDA_HOME: ${CUDA_HOME:-not set}" +echo " CUDA_ROOT: ${CUDA_ROOT:-not set}" +echo " CUDA_PATH: ${CUDA_PATH:-not set}" +echo " LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-not set}" +echo " TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-not set}" +echo " CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}" + +# Check PyTorch CUDA availability +echo "" +echo "🐍 PyTorch CUDA Check:" +python3 -c " +import sys +try: + import torch + print('βœ… PyTorch imported successfully') + print(f' Version: {torch.__version__}') + print(f' CUDA available: {torch.cuda.is_available()}') + if torch.cuda.is_available(): + print(f' CUDA version: {torch.version.cuda}') + print(f' cuDNN version: {torch.backends.cudnn.version()}') + print(f' Device count: {torch.cuda.device_count()}') + for i in range(torch.cuda.device_count()): + props = torch.cuda.get_device_properties(i) + print(f' Device {i}: {props.name} (SM {props.major}.{props.minor}, {props.total_memory//1024//1024}MB)') + else: + print('❌ CUDA not available to PyTorch') + print(' This could mean:') + print(' - CUDA runtime not properly installed') + print(' - GPU not accessible to container') + print(' - Driver/runtime version mismatch') +except ImportError as e: + print(f'❌ Failed to import PyTorch: {e}') +except Exception as e: + print(f'❌ PyTorch CUDA check failed: {e}') +" 2>&1 + +# Check for common CUDA issues +echo "" +echo "🩺 Common Issue Diagnostics:" + +# Check if running with proper Docker flags +if [ ! -e /dev/nvidia0 ] && [ ! -e /dev/nvidiactl ]; then + echo "❌ No NVIDIA device nodes - container likely missing --gpus all or --runtime=nvidia" +fi + +# Check CUDA library paths +if [ -z "$LD_LIBRARY_PATH" ] || ! echo "$LD_LIBRARY_PATH" | grep -q cuda; then + echo "⚠️ LD_LIBRARY_PATH may not include CUDA libraries" +fi + +# Check permissions on device files +if ls /dev/nvidia* >/dev/null 2>&1; then + if ! ls -la /dev/nvidia* | grep -q "rw-rw-rw-\|rw-r--r--"; then + echo "⚠️ NVIDIA device files may have restrictive permissions" + fi +fi + +echo "═══════════════════════════════════════════════════════════════════════" +echo "πŸš€ Starting application..." +echo "" + +exec su -p user -c "python3 wgp.py --listen $*" diff --git a/run-docker-cuda-deb.sh b/run-docker-cuda-deb.sh new file mode 100755 index 0000000..b35e9cc --- /dev/null +++ b/run-docker-cuda-deb.sh @@ -0,0 +1,210 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ───────────────────────── helpers ───────────────────────── + +install_nvidia_smi_if_missing() { + if command -v nvidia-smi &>/dev/null; then + return + fi + + echo "⚠️ nvidia-smi not found. Installing nvidia-utils…" + if [ "$EUID" -ne 0 ]; then + SUDO='sudo' + else + SUDO='' + fi + + $SUDO apt-get update + $SUDO apt-get install -y nvidia-utils-535 || $SUDO apt-get install -y nvidia-utils + + if ! command -v nvidia-smi &>/dev/null; then + echo "❌ Failed to install nvidia-smi. Cannot detect GPU architecture." + exit 1 + fi + echo "βœ… nvidia-smi installed successfully." +} + +detect_gpu_name() { + install_nvidia_smi_if_missing + nvidia-smi --query-gpu=name --format=csv,noheader,nounits | head -1 +} + +map_gpu_to_arch() { + local name="$1" + case "$name" in + *"RTX 50"* | *"5090"* | *"5080"* | *"5070"*) echo "12.0" ;; + *"H100"* | *"H800"*) echo "9.0" ;; + *"RTX 40"* | *"4090"* | *"4080"* | *"4070"* | *"4060"*) echo "8.9" ;; + *"RTX 30"* | *"3090"* | *"3080"* | *"3070"* | *"3060"*) echo "8.6" ;; + *"A100"* | *"A800"* | *"A40"*) echo "8.0" ;; + *"Tesla V100"*) echo "7.0" ;; + *"RTX 20"* | *"2080"* | *"2070"* | *"2060"* | *"Titan RTX"*) echo "7.5" ;; + *"GTX 16"* | *"1660"* | *"1650"*) echo "7.5" ;; + *"GTX 10"* | *"1080"* | *"1070"* | *"1060"* | *"Tesla P100"*) echo "6.1" ;; + *"Tesla K80"* | *"Tesla K40"*) echo "3.7" ;; + *) + echo "❌ Unknown GPU model: $name" + echo "Please update the map_gpu_to_arch function for this model." + exit 1 + ;; + esac +} + +get_gpu_vram() { + install_nvidia_smi_if_missing + # Get VRAM in MB, convert to GB + local vram_mb=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -1) + echo $((vram_mb / 1024)) +} + +map_gpu_to_profile() { + local name="$1" + local vram_gb="$2" + + # WanGP Profile descriptions from the actual UI: + # Profile 1: HighRAM_HighVRAM - 48GB+ RAM, 24GB+ VRAM (fastest for short videos, RTX 3090/4090) + # Profile 2: HighRAM_LowVRAM - 48GB+ RAM, 12GB+ VRAM (recommended, most versatile) + # Profile 3: LowRAM_HighVRAM - 32GB+ RAM, 24GB+ VRAM (RTX 3090/4090 with limited RAM) + # Profile 4: LowRAM_LowVRAM - 32GB+ RAM, 12GB+ VRAM (default, little VRAM or longer videos) + # Profile 5: VerylowRAM_LowVRAM - 16GB+ RAM, 10GB+ VRAM (fail safe, slow but works) + + case "$name" in + # High-end data center GPUs with 24GB+ VRAM - Profile 1 (HighRAM_HighVRAM) + *"RTX 50"* | *"5090"* | *"A100"* | *"A800"* | *"H100"* | *"H800"*) + if [ "$vram_gb" -ge 24 ]; then + echo "1" # HighRAM_HighVRAM - fastest for short videos + else + echo "2" # HighRAM_LowVRAM - most versatile + fi + ;; + # High-end consumer GPUs (RTX 3090/4090) - Profile 1 or 3 + *"RTX 40"* | *"4090"* | *"RTX 30"* | *"3090"*) + if [ "$vram_gb" -ge 24 ]; then + echo "3" # LowRAM_HighVRAM - good for limited RAM systems + else + echo "2" # HighRAM_LowVRAM - most versatile + fi + ;; + # Mid-range GPUs (RTX 3070/3080/4070/4080) - Profile 2 recommended + *"4080"* | *"4070"* | *"3080"* | *"3070"* | *"RTX 20"* | *"2080"* | *"2070"*) + if [ "$vram_gb" -ge 12 ]; then + echo "2" # HighRAM_LowVRAM - recommended for these GPUs + else + echo "4" # LowRAM_LowVRAM - default for little VRAM + fi + ;; + # Lower-end GPUs with 6-12GB VRAM - Profile 4 or 5 + *"4060"* | *"3060"* | *"2060"* | *"GTX 16"* | *"1660"* | *"1650"*) + if [ "$vram_gb" -ge 10 ]; then + echo "4" # LowRAM_LowVRAM - default + else + echo "5" # VerylowRAM_LowVRAM - fail safe + fi + ;; + # Older/lower VRAM GPUs - Profile 5 (fail safe) + *"GTX 10"* | *"1080"* | *"1070"* | *"1060"* | *"Tesla"*) + echo "5" # VerylowRAM_LowVRAM - fail safe + ;; + *) + echo "4" # LowRAM_LowVRAM - default fallback + ;; + esac +} + +# ───────────────────────── main ──────────────────────────── + +echo "πŸ”§ NVIDIA CUDA Setup Check:" + +# NVIDIA driver check +if command -v nvidia-smi &>/dev/null; then + DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits | head -1) + echo "βœ… NVIDIA Driver: $DRIVER_VERSION" + + # Quick CUDA 12.4 compatibility check + if [[ "$DRIVER_VERSION" =~ ^([0-9]+) ]]; then + MAJOR=${BASH_REMATCH[1]} + if [ "$MAJOR" -lt 520 ]; then + echo "⚠️ Driver $DRIVER_VERSION may not support CUDA 12.4 (need 520+)" + fi + fi +else + echo "❌ nvidia-smi not found - no NVIDIA drivers" + exit 1 +fi + +GPU_NAME=$(detect_gpu_name) +echo "πŸ” Detected GPU: $GPU_NAME" + +VRAM_GB=$(get_gpu_vram) +echo "🧠 Detected VRAM: ${VRAM_GB}GB" + +CUDA_ARCH=$(map_gpu_to_arch "$GPU_NAME") +echo "πŸš€ Using CUDA architecture: $CUDA_ARCH" + +PROFILE=$(map_gpu_to_profile "$GPU_NAME" "$VRAM_GB") +echo "βš™οΈ Selected profile: $PROFILE" + +docker build --build-arg CUDA_ARCHITECTURES="$CUDA_ARCH" -t deepbeepmeep/wan2gp . + +# sudo helper for later commands +if [ "$EUID" -ne 0 ]; then + SUDO='sudo' +else + SUDO='' +fi + +# Ensure NVIDIA runtime is available +if ! docker info 2>/dev/null | grep -q 'Runtimes:.*nvidia'; then + echo "⚠️ NVIDIA Docker runtime not found. Installing nvidia-docker2…" + $SUDO apt-get update + $SUDO apt-get install -y curl ca-certificates gnupg + curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | $SUDO apt-key add - + distribution=$( + . /etc/os-release + echo $ID$VERSION_ID + ) + curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | + $SUDO tee /etc/apt/sources.list.d/nvidia-docker.list + $SUDO apt-get update + $SUDO apt-get install -y nvidia-docker2 + echo "πŸ”„ Restarting Docker service…" + $SUDO systemctl restart docker + echo "βœ… NVIDIA Docker runtime installed." +else + echo "βœ… NVIDIA Docker runtime found." +fi + +# Quick NVIDIA runtime test +echo "πŸ§ͺ Testing NVIDIA runtime..." +if timeout 15s docker run --rm --gpus all --runtime=nvidia nvidia/cuda:12.4-runtime-ubuntu22.04 nvidia-smi >/dev/null 2>&1; then + echo "βœ… NVIDIA runtime working" +else + echo "❌ NVIDIA runtime test failed - check driver/runtime compatibility" +fi + +# Prepare cache dirs & volume mounts +cache_dirs=(numba matplotlib huggingface torch) +cache_mounts=() +for d in "${cache_dirs[@]}"; do + mkdir -p "$HOME/.cache/$d" + chmod 700 "$HOME/.cache/$d" + cache_mounts+=(-v "$HOME/.cache/$d:/home/user/.cache/$d") +done + +echo "πŸ”§ Optimization settings:" +echo " Profile: $PROFILE" + +# Run the container +docker run --rm -it \ + --name wan2gp \ + --gpus all \ + --runtime=nvidia \ + -p 7860:7860 \ + -v "$(pwd):/workspace" \ + "${cache_mounts[@]}" \ + deepbeepmeep/wan2gp \ + --profile "$PROFILE" \ + --attention sage \ + --compile \ + --perc-reserved-mem-max 1