Интеграция AudioCraft для генерации аудио
AudioCraft — фреймворк Meta, включающий MusicGen (музыка), AudioGen (звуковые эффекты и ambient) и EnCodec (аудио-кодек). Единая библиотека для всех задач аудиогенерации.
AudioGen — звуковые эффекты
from audiocraft.models import AudioGen
import torchaudio
import io
sfx_model = AudioGen.get_pretrained("facebook/audiogen-medium")
def generate_sound_effect(description: str, duration: float = 3.0) -> bytes:
sfx_model.set_generation_params(
duration=duration,
temperature=1.0,
top_k=250
)
wav = sfx_model.generate(descriptions=[description])
buf = io.BytesIO()
torchaudio.save(buf, wav[0].cpu(), sample_rate=16000, format="wav")
return buf.getvalue()
# Примеры звуков:
SFX_LIBRARY = {
"ui_click": "mouse click sound, sharp, digital",
"notification": "notification bell, pleasant, soft ping",
"error": "error sound, buzzer, short negative tone",
"success": "success chime, positive, ascending tones",
"typing": "keyboard typing, mechanical, office ambient",
"door_open": "door opening, wooden, slight creak",
"water": "water flowing in a stream, peaceful, nature",
"crowd": "office crowd noise, ambient, background conversation",
"thunder": "distant thunder, rumbling, storm approaching",
}
EnCodec — аудио-компрессия
from audiocraft.models import EnCodec
# Энкодер/декодер для сжатия аудио
encodec_model = EnCodec.get_pretrained("facebook/encodec_24khz")
def compress_audio(audio_bytes: bytes, bandwidth: float = 6.0) -> bytes:
"""
bandwidth: 1.5, 3, 6, 12, 24 kbps
6 kbps = хорошее качество речи
24 kbps = близко к lossless для музыки
"""
wav, sr = torchaudio.load(io.BytesIO(audio_bytes))
with torch.no_grad():
encoded = encodec_model.encode(wav.unsqueeze(0))
decoded = encodec_model.decode(encoded.audio_codes, encoded.audio_scales)[0]
buf = io.BytesIO()
torchaudio.save(buf, decoded.squeeze(0), sample_rate=24000, format="mp3")
return buf.getvalue()
Комбинированный пайплайн для игр
class GameAudioGenerator:
def __init__(self):
self.music_gen = MusicGen.get_pretrained("facebook/musicgen-medium")
self.sfx_gen = AudioGen.get_pretrained("facebook/audiogen-medium")
async def generate_game_scene_audio(self, scene_type: str) -> dict:
SCENE_CONFIGS = {
"battle": {
"music": "intense battle music, drums, brass, aggressive, fast tempo",
"sfx": ["sword clash metal sound", "arrow whoosh", "battle cry distant"]
},
"forest": {
"music": "peaceful forest ambient, acoustic guitar, birds, nature",
"sfx": ["birds chirping", "leaves rustling wind", "footsteps on grass"]
},
"menu": {
"music": "epic main menu theme, orchestral, majestic, loopable",
"sfx": ["menu button click", "confirmation chime"]
}
}
config = SCENE_CONFIGS.get(scene_type, SCENE_CONFIGS["menu"])
self.music_gen.set_generation_params(duration=60)
music_wav = self.music_gen.generate([config["music"]])
sfx_results = {}
self.sfx_gen.set_generation_params(duration=2)
for sfx_desc in config["sfx"]:
sfx_wav = self.sfx_gen.generate([sfx_desc])
sfx_results[sfx_desc] = sfx_wav[0].cpu()
return {
"music": music_wav[0].cpu(),
"sfx": sfx_results
}
Сроки: AudioCraft API с MusicGen + AudioGen — 1–2 дня. Интеграция в игровой движок или видеоредактор — дополнительно 1 неделя.







