Wan2.1/entrypoint.sh
2025-08-30 06:51:17 +03:00

119 lines
4.4 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
export HOME=/home/user
export PYTHONUNBUFFERED=1
export HF_HOME=/home/user/.cache/huggingface
export OMP_NUM_THREADS=$(nproc)
export MKL_NUM_THREADS=$(nproc)
export OPENBLAS_NUM_THREADS=$(nproc)
export NUMEXPR_NUM_THREADS=$(nproc)
export TORCH_ALLOW_TF32_CUBLAS=1
export TORCH_ALLOW_TF32_CUDNN=1
# Disable audio warnings in Docker
export SDL_AUDIODRIVER=dummy
export PULSE_RUNTIME_PATH=/tmp/pulse-runtime
# ═══════════════════════════ CUDA DEBUG CHECKS ═══════════════════════════
echo "🔍 CUDA Environment Debug Information:"
echo "═══════════════════════════════════════════════════════════════════════"
# Check CUDA driver on host (if accessible)
if command -v nvidia-smi >/dev/null 2>&1; then
echo "✅ nvidia-smi available"
echo "📊 GPU Information:"
nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free --format=csv,noheader,nounits 2>/dev/null || echo "❌ nvidia-smi failed to query GPU"
echo "🏃 Running Processes:"
nvidia-smi --query-compute-apps=pid,name,used_memory --format=csv,noheader,nounits 2>/dev/null || echo " No running CUDA processes"
else
echo "❌ nvidia-smi not available in container"
fi
# Check CUDA runtime libraries
echo ""
echo "🔧 CUDA Runtime Check:"
if ls /usr/local/cuda*/lib*/libcudart.so* >/dev/null 2>&1; then
echo "✅ CUDA runtime libraries found:"
ls /usr/local/cuda*/lib*/libcudart.so* 2>/dev/null
else
echo "❌ CUDA runtime libraries not found"
fi
# Check CUDA devices
echo ""
echo "🖥️ CUDA Device Files:"
if ls /dev/nvidia* >/dev/null 2>&1; then
echo "✅ NVIDIA device files found:"
ls -la /dev/nvidia* 2>/dev/null
else
echo "❌ No NVIDIA device files found - Docker may not have GPU access"
fi
# Check CUDA environment variables
echo ""
echo "🌍 CUDA Environment Variables:"
echo " CUDA_HOME: ${CUDA_HOME:-not set}"
echo " CUDA_ROOT: ${CUDA_ROOT:-not set}"
echo " CUDA_PATH: ${CUDA_PATH:-not set}"
echo " LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-not set}"
echo " TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-not set}"
echo " CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}"
# Check PyTorch CUDA availability
echo ""
echo "🐍 PyTorch CUDA Check:"
python3 -c "
import sys
try:
import torch
print('✅ PyTorch imported successfully')
print(f' Version: {torch.__version__}')
print(f' CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
print(f' CUDA version: {torch.version.cuda}')
print(f' cuDNN version: {torch.backends.cudnn.version()}')
print(f' Device count: {torch.cuda.device_count()}')
for i in range(torch.cuda.device_count()):
props = torch.cuda.get_device_properties(i)
print(f' Device {i}: {props.name} (SM {props.major}.{props.minor}, {props.total_memory//1024//1024}MB)')
else:
print('❌ CUDA not available to PyTorch')
print(' This could mean:')
print(' - CUDA runtime not properly installed')
print(' - GPU not accessible to container')
print(' - Driver/runtime version mismatch')
except ImportError as e:
print(f'❌ Failed to import PyTorch: {e}')
except Exception as e:
print(f'❌ PyTorch CUDA check failed: {e}')
" 2>&1
# Check for common CUDA issues
echo ""
echo "🩺 Common Issue Diagnostics:"
# Check if running with proper Docker flags
if [ ! -e /dev/nvidia0 ] && [ ! -e /dev/nvidiactl ]; then
echo "❌ No NVIDIA device nodes - container likely missing --gpus all or --runtime=nvidia"
fi
# Check CUDA library paths
if [ -z "$LD_LIBRARY_PATH" ] || ! echo "$LD_LIBRARY_PATH" | grep -q cuda; then
echo "⚠️ LD_LIBRARY_PATH may not include CUDA libraries"
fi
# Check permissions on device files
if ls /dev/nvidia* >/dev/null 2>&1; then
if ! ls -la /dev/nvidia* | grep -q "rw-rw-rw-\|rw-r--r--"; then
echo "⚠️ NVIDIA device files may have restrictive permissions"
fi
fi
echo "═══════════════════════════════════════════════════════════════════════"
echo "🚀 Starting application..."
echo ""
exec su -p user -c "python3 wgp.py --listen $*"