testsAndMisc-archive/python_pkg/music_gen/music_generator.py
Krzysztof kuhy Rudnicki 8f2fbd2311 refactor: enforce 500-line limit on all Python source files
Split 18+ Python files that exceeded 500 lines into smaller modules
with helper files (prefixed with _). All functions are re-exported
from the original modules to maintain backward compatibility with
test patches and external imports.

Files split:
- moviepy_showcase.py (1212 -> 302 + 3 helpers)
- anki_generator.py (1174 -> 473 + 4 helpers)
- test_analyze_chess_game.py (1152 -> 361 + 2 parts)
- poker_modifier_app.py (1024 -> 263 + 2 helpers)
- transcribe_fw.py (1007 -> 342 + 3 helpers)
- music_generator.py (1002 -> 319 + 2 helpers)
- translator.py (951 -> 442 + 2 helpers)
- cinema_planner.py (893 -> 369 + 2 helpers)
- lichess_bot/main.py (757 -> 495 + _game_logic.py)
- test_translator.py (725 -> 289 + part2 + conftest)
- test_lichess_api.py (680 -> 475 + part2)
- learning_pipe.py (668 -> 375 + 2 helpers)
- cache.py (655 -> 360 + _cache_decks.py)
- analyze_chess_game.py (632 -> 463 + _move_analysis.py)
- visualize_q02.py (609 -> 371 + helper)
- repo_explorer.py (602 -> 347 + 2 helpers)
- keyboard_coop/main.py (515 -> 416 + _dictionary.py)
- scanning.py (501 -> 314 + _enforce_loop.py)

All tests pass: 144 lichess_bot (100% branch coverage), 243 others.
No new lint errors introduced.
2026-03-17 22:47:42 +01:00

320 lines
9.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""Local AI music generator using Meta's MusicGen.
Generates music from text prompts using the open-source MusicGen model.
First run will download the model (~3.3GB for medium, ~500MB for small).
Usage:
python music_generator.py "upbeat electronic dance music with synths"
python music_generator.py --duration 15 "calm acoustic guitar melody"
python music_generator.py --model small "jazz piano solo"
python music_generator.py --interactive # Interactive mode
"""
from __future__ import annotations
import argparse
from pathlib import Path
import sys
import warnings
from python_pkg.music_gen._music_generation import (
CROSSFADE_DURATION,
SEGMENT_DURATION,
VRAM_THRESHOLD_LARGE,
VRAM_THRESHOLD_MEDIUM,
_calculate_segment_duration,
_generate_long_audio,
crossfade_audio,
generate_music,
generate_segment,
get_device,
get_vram_gb,
load_model,
select_model_size,
)
from python_pkg.music_gen._music_speech import (
BARK_MAX_CHARS,
BARK_VOICES,
_generate_instrumental_for_song,
_generate_vocals_for_song,
_mix_audio,
_resample_audio,
_split_into_sentences,
generate_song,
generate_speech,
)
# Suppress warnings for cleaner output
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
# Re-export all public symbols for backwards compatibility
__all__ = [
"BARK_MAX_CHARS",
"BARK_VOICES",
"CROSSFADE_DURATION",
"SEGMENT_DURATION",
"VRAM_THRESHOLD_LARGE",
"VRAM_THRESHOLD_MEDIUM",
"_calculate_segment_duration",
"_generate_instrumental_for_song",
"_generate_long_audio",
"_generate_vocals_for_song",
"_mix_audio",
"_resample_audio",
"_split_into_sentences",
"check_dependencies",
"crossfade_audio",
"generate_music",
"generate_segment",
"generate_song",
"generate_speech",
"get_device",
"get_vram_gb",
"interactive_mode",
"load_model",
"main",
"select_model_size",
]
def check_dependencies(*, include_bark: bool = False) -> bool:
"""Check if required packages are installed.
Args:
include_bark: Whether to check for Bark dependencies as well.
"""
import importlib.util
missing = []
if importlib.util.find_spec("torch") is None:
missing.append("torch")
if importlib.util.find_spec("transformers") is None:
missing.append("transformers")
if importlib.util.find_spec("scipy") is None:
missing.append("scipy")
if include_bark and importlib.util.find_spec("bark") is None:
missing.append("git+https://github.com/suno-ai/bark.git")
if missing:
print("Missing dependencies. Install with:")
print(f" pip install {' '.join(missing)}")
print("\nFor CUDA support:")
print(" pip install torch --index-url https://download.pytorch.org/whl/cu121")
print(" pip install transformers scipy")
if include_bark:
print("\nFor Bark vocals:")
print(" pip install git+https://github.com/suno-ai/bark.git")
return False
return True
def interactive_mode(model: object, processor: object) -> None:
"""Run interactive prompt mode."""
print("\n" + "=" * 60)
print("INTERACTIVE MODE")
print("=" * 60)
print("Enter prompts to generate music. Commands:")
print(" :q or :quit - Exit")
print(" :d <seconds> - Set duration (e.g., ':d 15')")
print(" :h or :help - Show example prompts")
print("=" * 60)
duration = 10
example_prompts = [
"upbeat electronic dance music with heavy bass",
"calm acoustic guitar melody with soft percussion",
"epic orchestral soundtrack with dramatic strings",
"lo-fi hip hop beats for studying",
"80s synthwave with retro vibes",
"jazz piano trio with upright bass",
"ambient electronic music for relaxation",
"rock guitar riff with drums",
"classical piano sonata in minor key",
"tropical house with steel drums",
]
while True:
try:
prompt = input(f"\n[{duration}s] Enter prompt: ").strip()
except (EOFError, KeyboardInterrupt):
print("\nExiting...")
break
if not prompt:
continue
if prompt.lower() in (":q", ":quit", "quit", "exit"):
print("Exiting...")
break
if prompt.lower() in (":h", ":help", "help"):
print("\nExample prompts:")
for i, ex in enumerate(example_prompts, 1):
print(f" {i}. {ex}")
continue
if prompt.startswith(":d "):
try:
duration = int(prompt[3:].strip())
duration = max(1, min(30, duration)) # Clamp to 1-30
print(f"Duration set to {duration}s")
except ValueError:
print("Invalid duration. Use ':d <number>' e.g., ':d 15'")
continue
# Check if user entered a number to use example prompt
if prompt.isdigit():
idx = int(prompt) - 1
if 0 <= idx < len(example_prompts):
prompt = example_prompts[idx]
print(f"Using: {prompt}")
else:
print(f"Invalid number. Enter 1-{len(example_prompts)}")
continue
try:
generate_music(prompt, model, processor, duration_seconds=duration)
except (RuntimeError, ValueError, OSError) as e:
print(f"Error generating music: {e}")
def main() -> None:
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Generate music or speech from text prompts",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Music generation (MusicGen):
%(prog)s "upbeat electronic dance music"
%(prog)s --duration 60 "calm piano melody"
%(prog)s --model small "jazz guitar solo"
%(prog)s --interactive
# Speech/vocals generation (Bark):
%(prog)s --speech "Hello, how are you today?"
%(prog)s --speech --voice v2/en_speaker_3 "Welcome!"
%(prog)s --speech "♪ La la la, I love to sing ♪"
# Full song with vocals over music:
%(prog)s --song "♪ Hello world, this is my song ♪" --music "upbeat pop"
Model sizes for MusicGen (auto-selected based on VRAM if not specified):
small - ~500MB, fastest, lower quality (3GB+ VRAM)
medium - ~3.3GB, good balance (8GB+ VRAM)
large - ~6.5GB, best quality (12GB+ VRAM)
Bark voices: v2/en_speaker_0 to v2/en_speaker_9
Bark tokens: [laughter] [laughs] [sighs] [music] [gasps] ♪ (singing)
""",
)
parser.add_argument(
"prompt",
nargs="?",
help="Text description of music/speech to generate",
)
parser.add_argument(
"-d",
"--duration",
type=int,
default=10,
help="Duration in seconds (default: 10, any length supported)",
)
parser.add_argument(
"-m",
"--model",
choices=["small", "medium", "large"],
default=None,
help="MusicGen model size (auto-select based on VRAM by default)",
)
parser.add_argument(
"-i",
"--interactive",
action="store_true",
help="Run in interactive mode (MusicGen only)",
)
parser.add_argument(
"-o",
"--output",
type=Path,
help="Output directory (default: ./output)",
)
parser.add_argument(
"-s",
"--speech",
action="store_true",
help="Generate speech/vocals using Bark instead of music",
)
parser.add_argument(
"-v",
"--voice",
default="v2/en_speaker_6",
help="Bark voice preset (default: v2/en_speaker_6)",
)
parser.add_argument(
"--song",
action="store_true",
help="Generate a full song with vocals over instrumental",
)
parser.add_argument(
"--music",
type=str,
default="upbeat pop instrumental backing track",
help="Music style for --song mode (default: upbeat pop)",
)
args = parser.parse_args()
if not args.prompt and not args.interactive:
parser.print_help()
print("\nError: Either provide a prompt or use --interactive mode")
sys.exit(1)
# Check dependencies
use_bark = args.speech or args.song
if not check_dependencies(include_bark=use_bark):
sys.exit(1)
if args.song:
# Full song generation mode (vocals + instrumental)
generate_song(
args.prompt,
args.music,
voice=args.voice,
output_dir=args.output,
)
elif args.speech:
# Bark speech generation mode
generate_speech(
args.prompt,
voice=args.voice,
output_dir=args.output,
)
else:
# MusicGen music generation mode
model_size = select_model_size(args.model)
model, processor = load_model(model_size)
if args.interactive:
interactive_mode(model, processor)
else:
generate_music(
args.prompt,
model,
processor,
duration_seconds=args.duration,
output_dir=args.output,
)
if __name__ == "__main__":
main()