Files
PyTaiko/libs/audio.py
2025-05-02 00:22:05 -04:00

761 lines
26 KiB
Python

import io
import os
import queue
import time
import wave
from threading import Lock, Thread
import pyray as ray
from numpy import (
abs as np_abs,
)
from numpy import (
column_stack,
float32,
frombuffer,
int16,
int32,
mean,
uint8,
zeros,
)
from numpy import (
max as np_max,
)
os.environ["SD_ENABLE_ASIO"] = "1"
import sounddevice as sd
from pydub import AudioSegment
from scipy import signal
from libs.utils import get_config
def resample(data, orig_sr, target_sr):
ratio = target_sr / orig_sr
if ratio == 1.0:
return data
if len(data.shape) == 1:
resampled_data = signal.resample_poly(data, target_sr, orig_sr)
else:
num_channels = data.shape[1]
resampled_channels = []
for ch in range(num_channels):
channel_data = data[:, ch]
resampled_channel = signal.resample_poly(channel_data, target_sr, orig_sr)
resampled_channels.append(resampled_channel)
resampled_data = column_stack(resampled_channels)
return resampled_data
def get_np_array(sample_width, raw_data):
if sample_width == 1:
# 8-bit samples are unsigned
data = frombuffer(raw_data, dtype=uint8)
return (data.astype(float32) - 128) / 128.0
elif sample_width == 2:
# 16-bit samples are signed
data = frombuffer(raw_data, dtype=int16)
return data.astype(float32) / 32768.0
elif sample_width == 3:
# 24-bit samples handling
data = zeros(len(raw_data) // 3, dtype=int32)
for i in range(len(data)):
data[i] = int.from_bytes(raw_data[i*3:i*3+3], byteorder='little', signed=True)
return data.astype(float32) / (2**23)
elif sample_width == 4:
# 32-bit samples are signed
data = frombuffer(raw_data, dtype=int32)
return data.astype(float32) / (2**31)
else:
raise ValueError(f"Unsupported sample width: {sample_width}")
class Sound:
def __init__(self, file_path, data=None, target_sample_rate=48000):
self.file_path = file_path
self.data = data
self.channels = 0
self.sample_rate = target_sample_rate
self.position = 0
self.is_playing = False
self.is_paused = False
self.volume = 1.0
self.pan = 0.5 # 0.0 = left, 0.5 = center, 1.0 = right
if file_path:
self.load()
def load(self):
"""Load and prepare the sound file data"""
if self.file_path.endswith('.ogg'):
audio = AudioSegment.from_ogg(self.file_path)
wav_io = io.BytesIO()
audio.export(wav_io, format="wav")
wav_io.seek(0)
file_path = wav_io
else:
file_path = self.file_path
with wave.open(file_path, 'rb') as wf:
# Get file properties
self.channels = wf.getnchannels()
sample_width = wf.getsampwidth()
original_sample_rate = wf.getframerate()
frames = wf.getnframes()
# Read all frames from the file
raw_data = wf.readframes(frames)
data = get_np_array(sample_width, raw_data)
# Reshape for multi-channel audio
if self.channels > 1:
data = data.reshape(-1, self.channels)
# Resample if needed
if original_sample_rate != self.sample_rate:
print(f"Resampling {self.file_path} from {original_sample_rate}Hz to {self.sample_rate}Hz")
data = resample(data, original_sample_rate, self.sample_rate)
self.data = data
def play(self):
self.position = 0
self.is_playing = True
self.is_paused = False
def stop(self):
self.is_playing = False
self.is_paused = False
self.position = 0
def pause(self):
if self.is_playing:
self.is_paused = True
self.is_playing = False
def resume(self):
if self.is_paused:
self.is_playing = True
self.is_paused = False
def get_frames(self, num_frames):
"""Get the next num_frames of audio data, applying volume, pitch, and pan"""
if self.data is None:
return
if not self.is_playing:
# Return silence if not playing
if self.channels == 1:
return zeros(num_frames, dtype=float32)
else:
return zeros((num_frames, self.channels), dtype=float32)
# Calculate how many frames we have left
frames_left = len(self.data) - self.position
if self.channels > 1:
frames_left = self.data.shape[0] - self.position
if frames_left <= 0:
# We've reached the end of the sound
self.is_playing = False
if self.channels == 1:
return zeros(num_frames, dtype=float32)
else:
return zeros((num_frames, self.channels), dtype=float32)
# Get the actual frames to return
frames_to_get = min(num_frames, frames_left)
if self.channels == 1:
output = zeros(num_frames, dtype=float32)
output[:frames_to_get] = self.data[self.position:self.position+frames_to_get]
else:
output = zeros((num_frames, self.channels), dtype=float32)
output[:frames_to_get] = self.data[self.position:self.position+frames_to_get]
self.position += frames_to_get
output *= self.volume
# Apply pan for stereo output
if self.channels == 2 and self.pan != 0.5:
# pan=0: full left, pan=0.5: center, pan=1: full right
left_vol = min(1.0, 2.0 * (1.0 - self.pan))
right_vol = min(1.0, 2.0 * self.pan)
output[:, 0] *= left_vol
output[:, 1] *= right_vol
return output
class Music:
def __init__(self, file_path, data=None, file_type=None, target_sample_rate=48000):
self.file_path = file_path
self.file_type = file_type
self.data = data
self.target_sample_rate = target_sample_rate
self.sample_rate = target_sample_rate
self.channels = 0
self.position = 0 # In frames
self.is_playing = False
self.is_paused = False
self.volume = 1.0
self.pan = 0.5 # Center
self.total_frames = 0
self.valid = False
self.wave_file = None
self.file_buffer_size = int(target_sample_rate * 5) # 5 seconds buffer
self.buffer = None
self.buffer_position = 0
# Thread-safe updates
self.lock = Lock()
self.load_from_file()
def load_from_file(self):
"""Load music from file"""
if self.file_path.endswith('.ogg'):
audio = AudioSegment.from_ogg(self.file_path)
wav_io = io.BytesIO()
audio.export(wav_io, format="wav")
wav_io.seek(0)
file_path = wav_io
else:
file_path = self.file_path
try:
# Keep the file open for streaming
self.wave_file = wave.open(file_path, 'rb')
# Get file properties
self.channels = self.wave_file.getnchannels()
self.sample_width = self.wave_file.getsampwidth()
self.sample_rate = self.wave_file.getframerate()
self.total_frames = self.wave_file.getnframes()
# Initialize buffer with some initial data
self._fill_buffer()
self.valid = True
print(f"Music loaded: {self.channels} channels, {self.sample_rate}Hz, {self.total_frames} frames")
except Exception as e:
print(f"Error loading music file: {e}")
if self.wave_file:
self.wave_file.close()
self.wave_file = None
self.valid = False
def _fill_buffer(self):
"""Fill the streaming buffer from file"""
if not self.wave_file:
return False
# Read a chunk of frames from file
try:
frames_to_read = min(self.file_buffer_size, self.total_frames - self.position)
if frames_to_read <= 0:
return False
raw_data = self.wave_file.readframes(frames_to_read)
data = get_np_array(self.sample_width, raw_data)
# Reshape for multi-channel audio
if self.channels > 1:
data = data.reshape(-1, self.channels)
if self.sample_rate != self.target_sample_rate:
print(f"Resampling {self.file_path} from {self.sample_rate}Hz to {self.target_sample_rate}Hz")
data = resample(data, self.sample_rate, self.target_sample_rate)
self.buffer = data
self.buffer_position = 0
return True
except Exception as e:
print(f"Error filling buffer: {e}")
return False
def update(self):
"""Update music stream buffers"""
if not self.is_playing or self.is_paused:
return
with self.lock:
# Check if we need to refill the buffer
if self.buffer is None:
raise Exception("buffer is None")
if self.wave_file and self.buffer_position >= len(self.buffer):
if not self._fill_buffer():
self.is_playing = False
def play(self):
"""Start playing the music stream"""
with self.lock:
# Reset position if at the end
if self.wave_file and self.position >= self.total_frames:
self.wave_file.rewind()
self.position = 0
self.buffer_position = 0
self._fill_buffer()
self.is_playing = True
self.is_paused = False
def stop(self):
"""Stop playing the music stream"""
with self.lock:
self.is_playing = False
self.is_paused = False
self.position = 0
self.buffer_position = 0
if self.wave_file:
self.wave_file.rewind()
self._fill_buffer()
def pause(self):
"""Pause the music playback"""
with self.lock:
if self.is_playing:
self.is_paused = True
self.is_playing = False
def resume(self):
"""Resume the music playback"""
with self.lock:
if self.is_paused:
self.is_playing = True
self.is_paused = False
def seek(self, position_seconds):
"""Seek to a specific position in seconds"""
with self.lock:
# Convert seconds to frames
frame_position = int(position_seconds * self.sample_rate)
# Clamp position to valid range
frame_position = max(0, min(frame_position, self.total_frames - 1))
# Update file position if streaming from file
if self.wave_file:
self.wave_file.setpos(frame_position)
self._fill_buffer()
self.position = frame_position
self.buffer_position = 0
def get_time_length(self):
"""Get the total length of the music in seconds"""
return self.total_frames / self.sample_rate
def get_time_played(self):
"""Get the current playback position in seconds"""
return (self.position + self.buffer_position) / self.sample_rate
def get_frames(self, num_frames):
"""Get the next num_frames of music data, applying volume, pitch, and pan"""
if not self.is_playing:
# Return silence if not playing
if self.channels == 1:
return zeros(num_frames, dtype=float32)
else:
return zeros((num_frames, self.channels), dtype=float32)
with self.lock:
if self.buffer is None:
raise Exception("buffer is None")
# Check if we need more data
if self.buffer_position >= len(self.buffer):
# If no more data available and streaming from file
if self.wave_file and not self._fill_buffer():
self.is_playing = False
if self.channels == 1:
return zeros(num_frames, dtype=float32)
else:
return zeros((num_frames, self.channels), dtype=float32)
# Calculate how many frames we have left in buffer
frames_left_in_buffer = len(self.buffer) - self.buffer_position
if self.channels > 1:
frames_left_in_buffer = self.buffer.shape[0] - self.buffer_position
frames_to_get = min(num_frames, frames_left_in_buffer)
if self.channels == 1:
output = zeros(num_frames, dtype=float32)
output[:frames_to_get] = self.buffer[self.buffer_position:self.buffer_position+frames_to_get]
else:
output = zeros((num_frames, self.channels), dtype=float32)
output[:frames_to_get] = self.buffer[self.buffer_position:self.buffer_position+frames_to_get]
# Update buffer position
self.buffer_position += frames_to_get
self.position += frames_to_get
# Apply volume
output *= self.volume
# Apply pan for stereo output
if self.channels == 2 and self.pan != 0.5:
# pan=0: full left, pan=0.5: center, pan=1: full right
left_vol = min(1.0, 2.0 * (1.0 - self.pan))
right_vol = min(1.0, 2.0 * self.pan)
output[:, 0] *= left_vol
output[:, 1] *= right_vol
return output
def __del__(self):
"""Cleanup when the music object is deleted"""
if self.wave_file:
try:
self.wave_file.close()
except Exception:
raise Exception("unable to close music stream")
class ASIOEngine:
def __init__(self):
self.target_sample_rate = 48000
self.buffer_size = get_config()["audio"]["asio_buffer"]
self.sounds = {}
self.music_streams = {}
self.stream = None
self.device_id = None
self.running = False
self.sound_queue = queue.Queue()
self.music_queue = queue.Queue()
self.master_volume = 1.0
self.output_channels = 2 # Default to stereo
self.audio_device_ready = False
# Threading for music stream updates
self.update_thread = None
self.update_thread_running = False
def _initialize_asio(self):
"""Set up ASIO device"""
# Find ASIO API and use its default device
hostapis = sd.query_hostapis()
asio_api_index = -1
for i, api in enumerate(hostapis):
if isinstance(api, dict) and 'name' in api and api['name'] == 'ASIO':
asio_api_index = i
break
if asio_api_index is not None:
asio_api = hostapis[asio_api_index]
if isinstance(asio_api, dict) and 'default_output_device' in asio_api:
default_asio_device = asio_api['default_output_device']
else:
raise Exception("Warning: 'default_output_device' key not found in ASIO API info.")
if default_asio_device >= 0:
self.device_id = default_asio_device
device_info = sd.query_devices(self.device_id)
if isinstance(device_info, sd.DeviceList):
raise Exception("Invalid ASIO Device")
print(f"Using default ASIO device: {device_info['name']}")
# Set output channels based on device capabilities
self.output_channels = device_info['max_output_channels']
if self.output_channels > 2:
# Limit to stereo for simplicity
self.output_channels = 2
return True
else:
print("No default ASIO device found, using system default.")
else:
print("ASIO API not found, using system default device.")
# If we get here, use default system device
self.device_id = None
device_info = sd.query_devices(sd.default.device[1])
if isinstance(device_info, sd.DeviceList):
raise Exception("Invalid ASIO Device")
self.output_channels = min(2, device_info['max_output_channels'])
return True
def _audio_callback(self, outdata, frames, time, status):
"""Callback function for the sounddevice stream"""
if status:
print(f"Status: {status}")
# Process any new sound play requests
while not self.sound_queue.empty():
try:
sound_name = self.sound_queue.get_nowait()
if sound_name in self.sounds:
self.sounds[sound_name].play()
except queue.Empty:
break
# Process any new music play requests
while not self.music_queue.empty():
try:
music_name, action, *args = self.music_queue.get_nowait()
if music_name in self.music_streams:
music = self.music_streams[music_name]
if action == 'play':
music.play()
elif action == 'stop':
music.stop()
elif action == 'pause':
music.pause()
elif action == 'resume':
music.resume()
elif action == 'seek' and args:
music.seek(args[0])
except queue.Empty:
break
# Mix all playing sounds and music
output = zeros((frames, self.output_channels), dtype=float32)
# Mix sounds
for sound_name, sound in self.sounds.items():
if sound.is_playing:
sound_data = sound.get_frames(frames)
# If mono sound but stereo output, duplicate to both channels
if sound.channels == 1 and self.output_channels > 1:
sound_data = column_stack([sound_data] * self.output_channels)
# Ensure sound_data matches the output format
if sound.channels > self.output_channels:
# Down-mix if needed
if self.output_channels == 1:
sound_data = mean(sound_data, axis=1)
else:
# Keep only the first output_channels
sound_data = sound_data[:, :self.output_channels]
# Add to the mix (simple additive mixing)
output += sound_data
# Mix music streams
for music_name, music in self.music_streams.items():
if music.is_playing:
music_data = music.get_frames(frames)
# If mono music but stereo output, duplicate to both channels
if music.channels == 1 and self.output_channels > 1:
music_data = column_stack([music_data] * self.output_channels)
# Ensure music_data matches the output format
if music.channels > self.output_channels:
# Down-mix if needed
if self.output_channels == 1:
music_data = mean(music_data, axis=1)
else:
# Keep only the first output_channels
music_data = music_data[:, :self.output_channels]
# Add to the mix
output += music_data
# Apply master volume
output *= self.master_volume
# Apply simple limiter to prevent clipping
max_val = np_max(np_abs(output))
if max_val > 1.0:
output = output / max_val
outdata[:] = output
def _start_update_thread(self):
"""Start a thread to update music streams"""
self.update_thread_running = True
self.update_thread = Thread(target=self._update_music_thread)
self.update_thread.daemon = True
self.update_thread.start()
def _update_music_thread(self):
"""Thread function to update all music streams"""
while self.update_thread_running:
# Update all active music streams
for music_name, music in self.music_streams.items():
if music.is_playing:
music.update()
# Sleep to not consume too much CPU
time.sleep(0.1)
def init_audio_device(self):
if self.audio_device_ready:
return True
try:
# Try to use ASIO if available
self._initialize_asio()
# Set up and start the stream
self.stream = sd.OutputStream(
samplerate=self.target_sample_rate,
channels=self.output_channels,
callback=self._audio_callback,
blocksize=self.buffer_size,
device=self.device_id
)
self.stream.start()
self.running = True
self.audio_device_ready = True
# Start update thread for music streams
self._start_update_thread()
print(f"Audio device initialized with {self.output_channels} channels at {self.target_sample_rate}Hz")
return True
except Exception as e:
print(f"Error initializing audio device: {e}")
self.audio_device_ready = False
return False
def close_audio_device(self):
self.update_thread_running = False
if self.update_thread:
self.update_thread.join(timeout=1.0)
if self.stream:
self.stream.stop()
self.stream.close()
self.stream = None
self.running = False
self.audio_device_ready = False
print("Audio device closed")
return
def is_audio_device_ready(self) -> bool:
return self.audio_device_ready
def set_master_volume(self, volume: float):
self.master_volume = max(0.0, min(1.0, volume))
def get_master_volume(self) -> float:
return self.master_volume
def load_sound(self, fileName: str) -> str | None:
try:
sound = Sound(fileName, self.target_sample_rate)
sound_id = f"sound_{len(self.sounds)}"
self.sounds[sound_id] = sound
print(f"Loaded sound from {fileName} as {sound_id}")
return sound_id
except Exception as e:
print(f"Error loading sound: {e}")
return None
def play_sound(self, sound):
if sound in self.sounds:
self.sound_queue.put(sound)
def stop_sound(self, sound):
if sound in self.sounds:
self.sounds[sound].stop()
def pause_sound(self, sound: str):
if sound in self.sounds:
self.sounds[sound].pause()
def resume_sound(self, sound: str):
if sound in self.sounds:
self.sounds[sound].resume()
def is_sound_playing(self, sound: str) -> bool:
if sound in self.sounds:
return self.sounds[sound].is_playing
return False
def set_sound_volume(self, sound: str, volume: float):
if sound in self.sounds:
self.sounds[sound].volume = max(0.0, min(1.0, volume))
def set_sound_pan(self, sound: str, pan: float):
if sound in self.sounds:
self.sounds[sound].pan = max(0.0, min(1.0, pan))
def load_music_stream(self, fileName: str) -> str | None:
try:
music = Music(file_path=fileName, target_sample_rate=self.target_sample_rate)
music_id = f"music_{len(self.music_streams)}"
self.music_streams[music_id] = music
print(f"Loaded music stream from {fileName} as {music_id}")
return music_id
except Exception as e:
print(f"Error loading music stream: {e}")
return None
def is_music_valid(self, music: str) -> bool:
if music in self.music_streams:
return self.music_streams[music].valid
return False
def unload_music_stream(self, music: str):
if music in self.music_streams:
del self.music_streams[music]
def play_music_stream(self, music: str):
if music in self.music_streams:
self.music_queue.put((music, 'play'))
def is_music_stream_playing(self, music: str) -> bool:
if music in self.music_streams:
return self.music_streams[music].is_playing
return False
def update_music_stream(self, music: str):
if music in self.music_streams:
self.music_streams[music].update()
def stop_music_stream(self, music: str):
if music in self.music_streams:
self.music_queue.put((music, 'stop'))
def pause_music_stream(self, music: str):
if music in self.music_streams:
self.music_queue.put((music, 'pause'))
def resume_music_stream(self, music: str):
if music in self.music_streams:
self.music_queue.put((music, 'resume'))
def seek_music_stream(self, music: str, position: float):
if music in self.music_streams:
self.music_queue.put((music, 'seek', position))
def set_music_volume(self, music: str, volume: float):
if music in self.music_streams:
self.music_streams[music].volume = max(0.0, min(1.0, volume))
def set_music_pan(self, music: str, pan: float):
if music in self.music_streams:
self.music_streams[music].pan = max(0.0, min(1.0, pan))
def get_music_time_length(self, music: str) -> float:
if music in self.music_streams:
return self.music_streams[music].get_time_length()
raise ValueError(f"Music stream {music} not initialized")
def get_music_time_played(self, music: str) -> float:
if music in self.music_streams:
return self.music_streams[music].get_time_played()
raise ValueError(f"Music stream {music} not initialized")
class AudioEngineWrapper:
def __init__(self, host_api):
self.host_api = host_api
if host_api == 'WASAPI':
self._module = ray
elif host_api == 'ASIO':
self._module = ASIOEngine()
else:
raise Exception("Invalid host API passed to wrapper")
def __getattr__(self, name):
try:
return getattr(self._module, name)
except AttributeError:
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}' and '{type(self._module).__name__}' has no attribute '{name}'")
audio = AudioEngineWrapper(get_config()["audio"]["device_type"])
if get_config()["audio"]["device_type"] == 'ASIO':
audio.set_master_volume(0.75)