mirror of
https://github.com/kuhyx/testsAndMisc.git
synced 2026-07-04 14:43:01 +02:00
Add local AI music generator using Meta's MusicGen
Features: - Generate music from text prompts using open-source MusicGen model - Support for small/medium/large models (500MB to 6.5GB) - CUDA, Apple Silicon MPS, and CPU support - Interactive mode with example prompts - Setup script that handles venv and GPU detection Usage: cd python_pkg/music_gen && ./setup.sh python music_generator.py 'upbeat electronic dance music'
This commit is contained in:
parent
55dac7965f
commit
bfef1a532b
@ -96,7 +96,7 @@ repos:
|
||||
- types-requests
|
||||
- types-PyYAML
|
||||
- types-python-dateutil
|
||||
exclude: ^(Bash/|\.venv/)
|
||||
exclude: ^(Bash/|\.venv/|python_pkg/music_gen/)
|
||||
|
||||
# ===========================================================================
|
||||
# PYLINT - Comprehensive Python linter
|
||||
|
||||
@ -68,6 +68,13 @@ unfixable = []
|
||||
"python_pkg/screen_locker/screen_lock.py" = [
|
||||
"FBT003", # Boolean positional values in tkinter API calls
|
||||
]
|
||||
# Music generator - CLI script with intentional patterns
|
||||
"python_pkg/music_gen/music_generator.py" = [
|
||||
"T201", # print() is intentional for CLI feedback
|
||||
"PLC0415", # Late imports for dependency checking
|
||||
"C901", # Complex interactive mode is acceptable
|
||||
"PLR0912", # Too many branches in interactive mode
|
||||
]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google" # Use Google docstring convention
|
||||
@ -127,6 +134,7 @@ color_output = true
|
||||
exclude = [
|
||||
"Bash/ffmpeg-build/",
|
||||
".venv/",
|
||||
"python_pkg/music_gen/", # Uses dynamic imports from transformers
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
|
||||
98
python_pkg/music_gen/README.md
Normal file
98
python_pkg/music_gen/README.md
Normal file
@ -0,0 +1,98 @@
|
||||
# MusicGen - Local AI Music Generator
|
||||
|
||||
Generate music from text prompts using Meta's open-source MusicGen model.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# 1. Run the setup script (creates venv, installs dependencies)
|
||||
cd python_pkg/music_gen
|
||||
./setup.sh
|
||||
|
||||
# 2. Activate the virtual environment
|
||||
source venv/bin/activate
|
||||
|
||||
# 3. Generate music!
|
||||
python music_generator.py "upbeat electronic dance music with synths"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Single Generation
|
||||
|
||||
```bash
|
||||
# Basic usage
|
||||
python music_generator.py "jazz piano with soft drums"
|
||||
|
||||
# Set duration (in seconds, max ~30 recommended)
|
||||
python music_generator.py --duration 20 "epic orchestral soundtrack"
|
||||
|
||||
# Use smaller/faster model
|
||||
python music_generator.py --model small "rock guitar riff"
|
||||
|
||||
# Use larger/better quality model (needs 16GB+ VRAM)
|
||||
python music_generator.py --model large "ambient electronic"
|
||||
```
|
||||
|
||||
### Interactive Mode
|
||||
|
||||
```bash
|
||||
python music_generator.py --interactive
|
||||
```
|
||||
|
||||
In interactive mode:
|
||||
|
||||
- Type prompts to generate music
|
||||
- `:d 15` - Set duration to 15 seconds
|
||||
- `:h` - Show example prompts
|
||||
- `:q` - Quit
|
||||
|
||||
## Model Sizes
|
||||
|
||||
| Model | Size | VRAM | Quality | Speed |
|
||||
| ------ | ------ | ----- | ------- | ------ |
|
||||
| small | ~500MB | ~4GB | Good | Fast |
|
||||
| medium | ~3.3GB | ~8GB | Better | Medium |
|
||||
| large | ~6.5GB | ~16GB | Best | Slow |
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.10+
|
||||
- 8GB+ RAM (16GB recommended)
|
||||
- GPU recommended (CUDA or Apple Silicon MPS)
|
||||
- Works on CPU but much slower
|
||||
|
||||
## Output
|
||||
|
||||
Generated audio files are saved to `./output/` as WAV files with timestamps.
|
||||
|
||||
## Example Prompts
|
||||
|
||||
- "upbeat electronic dance music with heavy bass"
|
||||
- "calm acoustic guitar melody with soft percussion"
|
||||
- "epic orchestral soundtrack with dramatic strings"
|
||||
- "lo-fi hip hop beats for studying"
|
||||
- "80s synthwave with retro vibes"
|
||||
- "jazz piano trio with upright bass"
|
||||
- "ambient electronic music for relaxation"
|
||||
- "rock guitar riff with drums"
|
||||
- "classical piano sonata in minor key"
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Out of Memory
|
||||
|
||||
- Try `--model small` for lower VRAM usage
|
||||
- Reduce duration with `--duration 5`
|
||||
- Close other GPU applications
|
||||
|
||||
### Slow Generation
|
||||
|
||||
- Make sure GPU is detected (check output at startup)
|
||||
- Use `--model small` for faster generation
|
||||
- Reduce duration
|
||||
|
||||
### No Sound / Corrupted File
|
||||
|
||||
- Check if scipy is installed: `pip install scipy`
|
||||
- Try a different audio player (VLC recommended)
|
||||
1
python_pkg/music_gen/__init__.py
Normal file
1
python_pkg/music_gen/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""AI Music Generator package using MusicGen."""
|
||||
324
python_pkg/music_gen/music_generator.py
Executable file
324
python_pkg/music_gen/music_generator.py
Executable file
@ -0,0 +1,324 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Local AI music generator using Meta's MusicGen.
|
||||
|
||||
Generates music from text prompts using the open-source MusicGen model.
|
||||
First run will download the model (~3.3GB for medium, ~500MB for small).
|
||||
|
||||
Usage:
|
||||
python music_generator.py "upbeat electronic dance music with synths"
|
||||
python music_generator.py --duration 15 "calm acoustic guitar melody"
|
||||
python music_generator.py --model small "jazz piano solo"
|
||||
python music_generator.py --interactive # Interactive mode
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
# Suppress warnings for cleaner output
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
warnings.filterwarnings("ignore", category=UserWarning)
|
||||
|
||||
|
||||
def check_dependencies() -> bool:
|
||||
"""Check if required packages are installed."""
|
||||
missing = []
|
||||
|
||||
try:
|
||||
import torch # noqa: F401
|
||||
except ImportError:
|
||||
missing.append("torch")
|
||||
|
||||
try:
|
||||
import torchaudio # noqa: F401
|
||||
except ImportError:
|
||||
missing.append("torchaudio")
|
||||
|
||||
try:
|
||||
import transformers # noqa: F401
|
||||
except ImportError:
|
||||
missing.append("transformers")
|
||||
|
||||
if missing:
|
||||
print("Missing dependencies. Install with:")
|
||||
print(f" pip install {' '.join(missing)}")
|
||||
print("\nOr run the full setup:")
|
||||
print(" pip install torch torchaudio transformers scipy")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_device() -> str:
|
||||
"""Get the best available device (CUDA, MPS, or CPU)."""
|
||||
import torch
|
||||
|
||||
if torch.cuda.is_available():
|
||||
device = "cuda"
|
||||
gpu_name = torch.cuda.get_device_name(0)
|
||||
vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
|
||||
print(f"Using CUDA GPU: {gpu_name} ({vram:.1f}GB VRAM)")
|
||||
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
||||
device = "mps"
|
||||
print("Using Apple Silicon (MPS)")
|
||||
else:
|
||||
device = "cpu"
|
||||
print("Using CPU (this will be slow)")
|
||||
return device
|
||||
|
||||
|
||||
def load_model(
|
||||
model_size: str = "medium",
|
||||
) -> tuple: # type: ignore[type-arg]
|
||||
"""Load the MusicGen model.
|
||||
|
||||
Args:
|
||||
model_size: One of 'small', 'medium', or 'large'
|
||||
- small: ~500MB, fastest, lower quality
|
||||
- medium: ~3.3GB, good balance (recommended)
|
||||
- large: ~6.5GB, best quality, needs more VRAM
|
||||
|
||||
Returns:
|
||||
Tuple of (model, processor)
|
||||
"""
|
||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||
|
||||
model_name = f"facebook/musicgen-{model_size}"
|
||||
print(f"\nLoading MusicGen {model_size} model...")
|
||||
print("(First run will download the model, this may take a while)")
|
||||
|
||||
device = get_device()
|
||||
|
||||
processor = AutoProcessor.from_pretrained(model_name)
|
||||
model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
model = model.to(device)
|
||||
|
||||
print(f"Model loaded successfully on {device}!")
|
||||
return model, processor
|
||||
|
||||
|
||||
def generate_music(
|
||||
prompt: str,
|
||||
model: object,
|
||||
processor: object,
|
||||
duration_seconds: int = 10,
|
||||
output_dir: Path | None = None,
|
||||
) -> Path:
|
||||
"""Generate music from a text prompt.
|
||||
|
||||
Args:
|
||||
prompt: Text description of the music to generate
|
||||
model: The MusicGen model
|
||||
processor: The MusicGen processor
|
||||
duration_seconds: Length of audio to generate (max ~30s recommended)
|
||||
output_dir: Directory to save output (defaults to ./output)
|
||||
|
||||
Returns:
|
||||
Path to the generated audio file
|
||||
"""
|
||||
import scipy.io.wavfile
|
||||
import torch
|
||||
|
||||
if output_dir is None:
|
||||
output_dir = Path(__file__).parent / "output"
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
print(f"\nGenerating {duration_seconds}s of music...")
|
||||
print(f"Prompt: {prompt!r}")
|
||||
|
||||
device = next(model.parameters()).device
|
||||
|
||||
# Prepare inputs
|
||||
inputs = processor(
|
||||
text=[prompt],
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
inputs = {k: v.to(device) for k, v in inputs.items()}
|
||||
|
||||
# Calculate tokens needed for duration
|
||||
# MusicGen generates ~50 tokens per second of audio
|
||||
max_new_tokens = int(duration_seconds * 50)
|
||||
|
||||
# Generate
|
||||
with torch.no_grad():
|
||||
audio_values = model.generate(
|
||||
**inputs,
|
||||
max_new_tokens=max_new_tokens,
|
||||
do_sample=True,
|
||||
)
|
||||
|
||||
# Get sample rate from model config
|
||||
sample_rate = model.config.audio_encoder.sampling_rate
|
||||
|
||||
# Convert to numpy and save
|
||||
audio_data = audio_values[0, 0].cpu().numpy()
|
||||
|
||||
# Create filename with timestamp and sanitized prompt
|
||||
timestamp = datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||||
safe_prompt = "".join(c if c.isalnum() or c in " -_" else "" for c in prompt[:30])
|
||||
safe_prompt = safe_prompt.strip().replace(" ", "_")
|
||||
filename = f"{timestamp}_{safe_prompt}.wav"
|
||||
output_path = output_dir / filename
|
||||
|
||||
scipy.io.wavfile.write(output_path, sample_rate, audio_data)
|
||||
|
||||
print(f"\nSaved to: {output_path}")
|
||||
print(f"Duration: {len(audio_data) / sample_rate:.1f}s")
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def interactive_mode(model: object, processor: object) -> None:
|
||||
"""Run interactive prompt mode."""
|
||||
print("\n" + "=" * 60)
|
||||
print("INTERACTIVE MODE")
|
||||
print("=" * 60)
|
||||
print("Enter prompts to generate music. Commands:")
|
||||
print(" :q or :quit - Exit")
|
||||
print(" :d <seconds> - Set duration (e.g., ':d 15')")
|
||||
print(" :h or :help - Show example prompts")
|
||||
print("=" * 60)
|
||||
|
||||
duration = 10
|
||||
|
||||
example_prompts = [
|
||||
"upbeat electronic dance music with heavy bass",
|
||||
"calm acoustic guitar melody with soft percussion",
|
||||
"epic orchestral soundtrack with dramatic strings",
|
||||
"lo-fi hip hop beats for studying",
|
||||
"80s synthwave with retro vibes",
|
||||
"jazz piano trio with upright bass",
|
||||
"ambient electronic music for relaxation",
|
||||
"rock guitar riff with drums",
|
||||
"classical piano sonata in minor key",
|
||||
"tropical house with steel drums",
|
||||
]
|
||||
|
||||
while True:
|
||||
try:
|
||||
prompt = input(f"\n[{duration}s] Enter prompt: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\nExiting...")
|
||||
break
|
||||
|
||||
if not prompt:
|
||||
continue
|
||||
|
||||
if prompt.lower() in (":q", ":quit", "quit", "exit"):
|
||||
print("Exiting...")
|
||||
break
|
||||
|
||||
if prompt.lower() in (":h", ":help", "help"):
|
||||
print("\nExample prompts:")
|
||||
for i, ex in enumerate(example_prompts, 1):
|
||||
print(f" {i}. {ex}")
|
||||
continue
|
||||
|
||||
if prompt.startswith(":d "):
|
||||
try:
|
||||
duration = int(prompt[3:].strip())
|
||||
duration = max(1, min(30, duration)) # Clamp to 1-30
|
||||
print(f"Duration set to {duration}s")
|
||||
except ValueError:
|
||||
print("Invalid duration. Use ':d <number>' e.g., ':d 15'")
|
||||
continue
|
||||
|
||||
# Check if user entered a number to use example prompt
|
||||
if prompt.isdigit():
|
||||
idx = int(prompt) - 1
|
||||
if 0 <= idx < len(example_prompts):
|
||||
prompt = example_prompts[idx]
|
||||
print(f"Using: {prompt}")
|
||||
else:
|
||||
print(f"Invalid number. Enter 1-{len(example_prompts)}")
|
||||
continue
|
||||
|
||||
try:
|
||||
generate_music(prompt, model, processor, duration_seconds=duration)
|
||||
except (RuntimeError, ValueError, OSError) as e:
|
||||
print(f"Error generating music: {e}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate music from text prompts using MusicGen",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s "upbeat electronic dance music"
|
||||
%(prog)s --duration 20 "calm piano melody"
|
||||
%(prog)s --model small "jazz guitar solo"
|
||||
%(prog)s --interactive
|
||||
|
||||
Model sizes:
|
||||
small - ~500MB, fastest, lower quality
|
||||
medium - ~3.3GB, good balance (default)
|
||||
large - ~6.5GB, best quality, needs 16GB+ VRAM
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"prompt",
|
||||
nargs="?",
|
||||
help="Text description of music to generate",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--duration",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Duration in seconds (default: 10, max recommended: 30)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
choices=["small", "medium", "large"],
|
||||
default="medium",
|
||||
help="Model size (default: medium)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--interactive",
|
||||
action="store_true",
|
||||
help="Run in interactive mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
type=Path,
|
||||
help="Output directory (default: ./output)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.prompt and not args.interactive:
|
||||
parser.print_help()
|
||||
print("\nError: Either provide a prompt or use --interactive mode")
|
||||
sys.exit(1)
|
||||
|
||||
# Check dependencies
|
||||
if not check_dependencies():
|
||||
sys.exit(1)
|
||||
|
||||
# Load model
|
||||
model, processor = load_model(args.model)
|
||||
|
||||
if args.interactive:
|
||||
interactive_mode(model, processor)
|
||||
else:
|
||||
generate_music(
|
||||
args.prompt,
|
||||
model,
|
||||
processor,
|
||||
duration_seconds=args.duration,
|
||||
output_dir=args.output,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
73
python_pkg/music_gen/setup.sh
Executable file
73
python_pkg/music_gen/setup.sh
Executable file
@ -0,0 +1,73 @@
|
||||
#!/bin/bash
|
||||
# Setup script for local AI music generation using MusicGen
|
||||
# Run this script to install all dependencies
|
||||
|
||||
set -e
|
||||
|
||||
echo "========================================"
|
||||
echo " MusicGen Local Setup"
|
||||
echo "========================================"
|
||||
|
||||
# Check Python version
|
||||
python_version=$(python3 --version 2>&1 | grep -oE '[0-9]+\.[0-9]+')
|
||||
echo "Python version: $python_version"
|
||||
|
||||
# Detect if we're in a virtual environment
|
||||
if [ -z "$VIRTUAL_ENV" ]; then
|
||||
echo ""
|
||||
echo "No virtual environment detected."
|
||||
echo "Creating one in ./venv..."
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
echo "Virtual environment activated: $VIRTUAL_ENV"
|
||||
else
|
||||
echo "Using existing virtual environment: $VIRTUAL_ENV"
|
||||
fi
|
||||
|
||||
# Upgrade pip
|
||||
echo ""
|
||||
echo "Upgrading pip..."
|
||||
pip install --upgrade pip
|
||||
|
||||
# Detect GPU
|
||||
echo ""
|
||||
echo "Detecting GPU..."
|
||||
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
echo "NVIDIA GPU detected!"
|
||||
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || true
|
||||
echo ""
|
||||
echo "Installing PyTorch with CUDA support..."
|
||||
pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu121
|
||||
elif [[ "$(uname)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then
|
||||
echo "Apple Silicon detected!"
|
||||
echo "Installing PyTorch with MPS support..."
|
||||
pip install torch torchaudio
|
||||
else
|
||||
echo "No GPU detected, using CPU (generation will be slower)"
|
||||
pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu
|
||||
fi
|
||||
|
||||
# Install transformers and other dependencies
|
||||
echo ""
|
||||
echo "Installing transformers and other dependencies..."
|
||||
pip install transformers scipy
|
||||
|
||||
# Create output directory
|
||||
mkdir -p output
|
||||
|
||||
echo ""
|
||||
echo "========================================"
|
||||
echo " Setup Complete!"
|
||||
echo "========================================"
|
||||
echo ""
|
||||
echo "To activate the virtual environment:"
|
||||
echo " source venv/bin/activate"
|
||||
echo ""
|
||||
echo "Usage examples:"
|
||||
echo " python music_generator.py 'upbeat electronic dance music'"
|
||||
echo " python music_generator.py --duration 15 'calm acoustic guitar'"
|
||||
echo " python music_generator.py --interactive"
|
||||
echo ""
|
||||
echo "Model will be downloaded on first run (~3.3GB for medium model)"
|
||||
echo ""
|
||||
Loading…
Reference in New Issue
Block a user