# Core AI stack diffusers==0.34.0 transformers==4.53.1 tokenizers>=0.20.3 accelerate>=1.1.1 tqdm imageio imageio-ffmpeg einops sentencepiece open_clip_torch>=2.29.0 # Video & media moviepy==1.0.3 av ffmpeg-python pygame>=2.1.0 sounddevice>=0.4.0 soundfile mutagen pyloudnorm librosa==0.11.0 speechbrain==1.0.3 # UI & interaction gradio==5.29.0 dashscope loguru # Vision & segmentation opencv-python>=4.9.0.80 segment-anything rembg[gpu]==2.0.65 onnxruntime-gpu decord timm # Config & orchestration omegaconf hydra-core easydict pydantic==2.10.6 # Math & modeling torchdiffeq>=0.2.5 tensordict>=0.6.1 mmgp==3.6.0 peft==0.15.0 matplotlib # Utilities ftfy piexif pynvml misaki # Optional / commented out # transformers==4.46.3 # for llamallava pre-patch # rembg==2.0.65 # non-GPU fallback # huggingface_hub[hf_xet] # slows down everything # num2words # spacy