Real-time audio translation

I've reviewed the Python script you provided and made several refinements to improve its robustness, error handling, and clarity. Below is the updated script.

Key improvements:

Dynamic GPU/CPU for Whisper: Whisper transcription now attempts to use fp16=True (for GPU) if a CUDA-enabled GPU is detected with PyTorch, otherwise defaults to fp16=False (for CPU).
Argos Translate Initialization: The logic for checking and attempting to install Argos Translate packages has been slightly refined. It still relies on get_translation_from_codes and guides the user to argospm if automatic setup fails, which is a practical approach given the complexity of Argos Translate's multi-hop translations.
Coqui TTS Initialization: The check for re-initializing Coqui TTS if the target language model changes is more direct. The potentially problematic synthesizer.close() call has been removed as it's not a standard part of the TTS.api.TTS object cleanup.
General Clarity: Added more comments and refined print statements for better user feedback.

Before Running:

Install Dependencies:
pip install TTS openai-whisper argostranslate sounddevice numpy pyaudio torch torchaudio
(Ensure torch and torchaudio are compatible with your system, potentially with CUDA if you have an NVIDIA GPU: pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu118 for CUDA 11.8, adjust as needed). You also need ffmpeg on your system PATH for Whisper: sudo apt update && sudo apt install ffmpeg (Linux) or download from ffmpeg.org.
Audio Setup: Configure your system audio (VB-Audio Cable, BlackHole, PulseAudio) as described in your initial prompt.
List Audio Devices: Run python your_script_name.py list_devices.
Set INPUT_DEVICE_INDEX: Edit the script and set the INPUT_DEVICE_INDEX variable to the correct number for your desired input audio device.

Here's the refined script:

project="realtime_audio_translator_v2"
file="translator_v2.py"
type="python"
import os
import tempfile
import wave
import numpy as np
import sounddevice as sd
import whisper
import argostranslate.package
import argostranslate.translate
from TTS.api import TTS
import time
import sys
import torch # For checking CUDA availability

# --- Configuration ---
# Audio settings
RECORD_SECONDS = 3
INPUT_DEVICE_INDEX = None  # IMPORTANT: Set this after running with 'list_devices'
SAMPLE_RATE = 16000
CHANNELS = 1

# Whisper settings
WHISPER_MODEL_NAME = "base"  # e.g., "tiny.en", "base.en", "small", "medium"
# If using a multilingual model (not ".en"), Whisper will detect source language.

# Argos Translate settings
TARGET_LANGUAGE_CODE = "es"  # Target language for translation (e.g., "es", "fr", "de")

# Coqui TTS settings
TTS_MODELS = {
    "en": "tts_models/en/ljspeech/tacotron2-DDC",
    "es": "tts_models/es/css10/vits",
    "fr": "tts_models/fr/css10/vits",
    "de": "tts_models/de/css10/vits",
    # Add other languages and their corresponding Coqui TTS models
    # Find models using `tts --list_models` in your terminal
}
USE_GPU_TTS = False # Set to True if you have a compatible GPU for Coqui TTS

# --- Global Variables ---
whisper_model = None
argos_translator = None
coqui_tts = None
is_whisper_en_model = ".en" in WHISPER_MODEL_NAME

def list_audio_devices():
    """Lists available audio input devices."""
    print("Available audio input devices:")
    try:
        devices = sd.query_devices()
        found_input_device = False
        for i, device in enumerate(devices):
            if device['max_input_channels'] > 0:
                print(f"  Index {i}: {device['name']} (Input Channels: {device['max_input_channels']})")
                found_input_device = True
        if not found_input_device:
            print("  No input devices found. Ensure microphone or virtual cable is connected and configured.")
    except Exception as e:
        print(f"Error querying audio devices: {e}")
        print("Ensure sounddevice and PortAudio (or equivalent) are correctly installed.")
    print("\nSet INPUT_DEVICE_INDEX in the script to the desired input device number.")

def initialize_whisper():
    """Initializes the Whisper model."""
    global whisper_model
    print(f"Loading Whisper model: {WHISPER_MODEL_NAME}...")
    try:
        whisper_model = whisper.load_model(WHISPER_MODEL_NAME)
        print("Whisper model loaded successfully.")
    except Exception as e:
        print(f"Error loading Whisper model ({WHISPER_MODEL_NAME}): {e}")
        print("Ensure PyTorch is installed correctly and the model name is valid.")
        print("You might need to install ffmpeg: `sudo apt install ffmpeg` or download from ffmpeg.org.")
        sys.exit(1)

def initialize_argos_translate(source_lang_code, target_lang_code):
    """Initializes Argos Translate for the given language pair."""
    global argos_translator
    print(f"Initializing Argos Translate for {source_lang_code} -> {target_lang_code}...")
    try:
        # Attempt to get the translation directly
        argos_translator = argostranslate.translate.get_translation_from_codes(source_lang_code, target_lang_code)
        print(f"Argos Translate for {source_lang_code} -> {target_lang_code} loaded successfully.")
        return
    except Exception: # pylint: disable=broad-except
        print(f"Direct translation {source_lang_code} -> {target_lang_code} not found. Checking packages...")

    try:
        argostranslate.package.update_package_index()
        available_packages = argostranslate.package.get_available_packages()
        
        # Try to find and install the required package(s)
        # This is a simplified attempt for direct packages. Multi-hop might require manual setup.
        package_to_install = next(
            (pkg for pkg in available_packages if pkg.from_code == source_lang_code and pkg.to_code == target_lang_code),
            None
        )

        if package_to_install:
            installed_argos_packages = argostranslate.package.get_installed_packages()
            is_pkg_already_installed = any(
                p.from_code == package_to_install.from_code and \
                p.to_code == package_to_install.to_code and \
                p.package_version == package_to_install.package_version 
                for p in installed_argos_packages
            )
            if not is_pkg_already_installed:
                print(f"Attempting to download and install Argos Translate package: {package_to_install.from_code}_to_{package_to_install.to_code}")
                download_path = package_to_install.download()
                argostranslate.package.install_from_path(download_path)
                print("Package installed. Re-attempting to load translation...")
                argos_translator = argostranslate.translate.get_translation_from_codes(source_lang_code, target_lang_code)
                print(f"Argos Translate for {source_lang_code} -> {target_lang_code} loaded after package installation.")
                return
            else:
                print(f"Package {package_to_install.from_code}_to_{package_to_install.to_code} seems already installed, but direct load failed.")
        
        print(f"Could not automatically find or install a direct Argos Translate package for {source_lang_code} -> {target_lang_code}.")

    except Exception as e_pkg: # pylint: disable=broad-except
        print(f"Error during Argos Translate package handling: {e_pkg}")

    print(f"Failed to initialize Argos Translate for {source_lang_code} -> {target_lang_code}.")
    print("Please install the required language models manually using 'argospm'.")
    print(f"E.g., `argospm update && argospm install translate-{source_lang_code}_{target_lang_code}`")
    print("Translation will be skipped for this pair if not resolved.")
    argos_translator = None # Ensure it's None if setup failed

def initialize_coqui_tts(language_code):
    """Initializes Coqui TTS for the target language."""
    global coqui_tts
    
    model_name = TTS_MODELS.get(language_code)
    if not model_name:
        print(f"No Coqui TTS model specified in TTS_MODELS for language code: {language_code}.")
        print("TTS will not be available for this language.")
        coqui_tts = None
        return

    print(f"Initializing Coqui TTS with model: {model_name} for language: {language_code} (GPU: {USE_GPU_TTS})...")
    try:
        coqui_tts = TTS(model_name=model_name, progress_bar=False, gpu=USE_GPU_TTS)
        print("Coqui TTS initialized successfully.")
    except Exception as e:
        print(f"Error initializing Coqui TTS model {model_name}: {e}")
        print("Ensure PyTorch is installed correctly and the TTS model name is valid.")
        print("Models are usually downloaded on first use. Check network connection if it's the first time.")
        coqui_tts = None

def record_audio(device_index, duration, samplerate, channels):
    """Records audio from the specified device."""
    print(f"Recording for {duration} seconds from device index {device_index}...")
    try:
        recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=channels, device=device_index, dtype='int16')
        sd.wait()
        print("Recording finished.")
        return recording
    except Exception as e:
        print(f"Error during audio recording: {e}")
        if device_index is None:
            print("INPUT_DEVICE_INDEX is not set. Cannot record.")
        return None

def save_to_temp_wav(recording_data, samplerate, channels):
    """Saves audio data to a temporary WAV file."""
    if recording_data is None:
        return None
    try:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav', prefix='rec_')
        with wave.open(temp_file.name, 'wb') as wf:
            wf.setnchannels(channels)
            wf.setsampwidth(2)  # 16-bit audio
            wf.setframerate(samplerate)
            wf.writeframes(recording_data.tobytes())
        return temp_file.name
    except Exception as e:
        print(f"Error saving temporary WAV file: {e}")
        return None

def transcribe_audio(audio_path):
    """Transcribes audio using Whisper."""
    if not whisper_model or not audio_path:
        return None, None
    print("Transcribing audio...")
    try:
        use_fp16 = torch.cuda.is_available() # Use fp16 if CUDA GPU is available
        print(f"Whisper using fp16: {use_fp16}")
        result = whisper_model.transcribe(audio_path, fp16=use_fp16)
        text = result["text"].strip()
        detected_language = result["language"]
        print(f"Transcription: '{text}' (Detected language: {detected_language})")
        return text, detected_language
    except Exception as e:
        print(f"Error during transcription: {e}")
        return None, None

def translate_text_with_argos(text_to_translate, source_lang_code, target_lang_code_val):
    """Translates text using Argos Translate, re-initializing if necessary."""
    global argos_translator
    if not text_to_translate or not source_lang_code or not target_lang_code_val:
        return None

    # Check if translator needs re-initialization (if source language changed or not initialized)
    current_translator_source = None
    current_translator_target = None
    if argos_translator and hasattr(argos_translator, 'from_lang') and hasattr(argos_translator, 'to_lang'):
         current_translator_source = argos_translator.from_lang.code
         current_translator_target = argos_translator.to_lang.code
    
    if not argos_translator or \
       current_translator_source != source_lang_code or \
       current_translator_target != target_lang_code_val:
        print(f"Re-initializing Argos Translate for {source_lang_code} -> {target_lang_code_val} (current: {current_translator_source}->{current_translator_target})")
        initialize_argos_translate(source_lang_code, target_lang_code_val)

    if not argos_translator: # Check again after attempting initialization
        print(f"Argos Translate not available for {source_lang_code} -> {target_lang_code_val}. Skipping translation.")
        return None
        
    print(f"Translating '{text_to_translate}' from {source_lang_code} to {target_lang_code_val}...")
    try:
        translated_text = argos_translator.translate(text_to_translate)
        print(f"Translation: '{translated_text}'")
        return translated_text
    except Exception as e:
        print(f"Error during Argos translation: {e}")
        return None

def speak_text_with_coqui(text_to_speak, language_code_for_tts):
    """Speaks text using Coqui TTS, re-initializing if necessary."""
    global coqui_tts
    if not text_to_speak or not language_code_for_tts:
        return

    expected_model_name = TTS_MODELS.get(language_code_for_tts)
    if not expected_model_name:
        print(f"No Coqui TTS model specified for language code: {language_code_for_tts}. Cannot speak.")
        return

    if not coqui_tts or coqui_tts.model_name != expected_model_name:
        print(f"Coqui TTS needs (re)initialization for {language_code_for_tts} (expected model: {expected_model_name}, current: {coqui_tts.model_name if coqui_tts else 'None'}).")
        initialize_coqui_tts(language_code_for_tts)

    if not coqui_tts: # Check again after attempting initialization
        print("Coqui TTS not available. Cannot speak text.")
        return

    print(f"Speaking '{text_to_speak}' in {language_code_for_tts}...")
    try:
        # For most models in TTS_MODELS, a simple call is enough.
        # If a model is multi-speaker/multi-lingual, you might need:
        # wav_data = coqui_tts.tts(text=text_to_speak, speaker=coqui_tts.speakers[0], language=coqui_tts.languages[0])
        wav_data = coqui_tts.tts(text=text_to_speak)
        
        audio_data_np = np.array(wav_data, dtype=np.float32)
        output_sample_rate = coqui_tts.synthesizer.output_sample_rate
        
        sd.play(audio_data_np, samplerate=output_sample_rate)
        sd.wait()
        print("Playback finished.")
    except Exception as e:
        print(f"Error during Coqui TTS speech synthesis or playback: {e}")
        if "speaker_id" in str(e).lower() or "language_id" in str(e).lower() or "speaker_idx" in str(e).lower():
             print("This Coqui TTS model might require specifying speaker_idx and/or language_idx.")
             print("E.g., `wav_data = coqui_tts.tts(text=text_to_speak, speaker_idx='p225', language_idx='en')` (example values).")
             print("Check the model card or `tts --list_models` for details on available speakers/languages for the model.")

def main_loop():
    """Main real-time translation loop."""
    global is_whisper_en_model # Access the global flag

    if INPUT_DEVICE_INDEX is None:
        print("CRITICAL ERROR: INPUT_DEVICE_INDEX is not set.")
        print("Please run the script with the 'list_devices' argument first, identify your input device,")
        print("then edit the script to set the INPUT_DEVICE_INDEX variable at the top.")
        return

    # Initializations
    initialize_whisper()
    
    # Initial Argos Translate and Coqui TTS setup.
    # Argos will be re-initialized dynamically if the detected source language changes.
    # Coqui is for the fixed TARGET_LANGUAGE_CODE.
    initial_source_lang_for_argos = "en" if is_whisper_en_model else "en" # Default to 'en' for first Argos init
    initialize_argos_translate(initial_source_lang_for_argos, TARGET_LANGUAGE_CODE)
    initialize_coqui_tts(TARGET_LANGUAGE_CODE)

    print("\nStarting real-time translation loop. Press Ctrl+C to stop.")
    try:
        while True:
            print(f"\n--- Loop Start (Listening for {RECORD_SECONDS}s) ---")
            loop_start_time = time.time()

            # 1. Record audio
            recording = record_audio(INPUT_DEVICE_INDEX, RECORD_SECONDS, SAMPLE_RATE, CHANNELS)
            if recording is None:
                print("Skipping iteration due to recording error.")
                time.sleep(1)
                continue
            
            temp_wav_path = save_to_temp_wav(recording, SAMPLE_RATE, CHANNELS)
            if not temp_wav_path:
                print("Skipping iteration due to WAV saving error.")
                if os.path.exists(temp_wav_path or ""): os.remove(temp_wav_path) # Defensive cleanup
                continue
            
            # 2. Transcribe
            transcribed_text, detected_source_lang_code = transcribe_audio(temp_wav_path)
            if os.path.exists(temp_wav_path): # Ensure cleanup even if transcription fails partially
                 os.remove(temp_wav_path)

            if not transcribed_text:
                print("No transcription obtained. Skipping rest of loop.")
                continue

            # Determine actual source language for translation
            # If Whisper model is English-only, source is 'en'. Otherwise, use detected.
            actual_source_lang_for_translation = "en" if is_whisper_en_model else detected_source_lang_code
            if not actual_source_lang_for_translation:
                print("Could not determine source language from Whisper. Skipping translation.")
                continue
            
            # 3. Translate
            translated_text = translate_text_with_argos(transcribed_text, actual_source_lang_for_translation, TARGET_LANGUAGE_CODE)
            if not translated_text:
                print("No translation obtained. Skipping TTS.")
                continue

            # 4. Speak translated text
            speak_text_with_coqui(translated_text, TARGET_LANGUAGE_CODE) # TTS uses the target language
            
            loop_end_time = time.time()
            print(f"Loop completed in {loop_end_time - loop_start_time:.2f} seconds.")

    except KeyboardInterrupt:
        print("\nUser interrupted. Stopping real-time translation.")
    except Exception as e_main: # pylint: disable=broad-except
        print(f"\nAn unexpected error occurred in the main loop: {e_main}")
    finally:
        print("Exiting application.")

if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1].lower() == "list_devices":
        list_audio_devices()
    else:
        # --- IMPORTANT ---
        # SET YOUR INPUT_DEVICE_INDEX near the top of the script after running:
        # python your_script_name.py list_devices
        # Example: INPUT_DEVICE_INDEX = 2 (replace 2 with your device's actual index)
        # --- ----------- ---
        if INPUT_DEVICE_INDEX is None: # Final check before starting main loop
             print("ERROR: INPUT_DEVICE_INDEX is not set at the top of the script.")
             print("Run `python translator_v2.py list_devices`, then edit the script to set INPUT_DEVICE_INDEX.")
        else:
             main_loop()

This script should provide a more stable base for your real-time audio translation system. Remember that achieving very low latency (1-3 seconds) consistently will depend heavily on your hardware (CPU/GPU), the chosen model sizes, and the efficiency of the audio routing on your OS.

I've reviewed the Python script you provided and made several refinements to improve its robustness, error handling, and clarity. Below is the updated script.

Key improvements:

Dynamic GPU/CPU for Whisper: Whisper transcription now attempts to use fp16=True (for GPU) if a CUDA-enabled GPU is detected with PyTorch, otherwise defaults to fp16=False (for CPU).
Argos Translate Initialization: The logic for checking and attempting to install Argos Translate packages has been slightly refined. It still relies on get_translation_from_codes and guides the user to argospm if automatic setup fails, which is a practical approach given the complexity of Argos Translate's multi-hop translations.
Coqui TTS Initialization: The check for re-initializing Coqui TTS if the target language model changes is more direct. The potentially problematic synthesizer.close() call has been removed as it's not a standard part of the TTS.api.TTS object cleanup.
General Clarity: Added more comments and refined print statements for better user feedback.

Before Running:

Install Dependencies:
pip install TTS openai-whisper argostranslate sounddevice numpy pyaudio torch torchaudio
(Ensure torch and torchaudio are compatible with your system, potentially with CUDA if you have an NVIDIA GPU: pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu118 for CUDA 11.8, adjust as needed). You also need ffmpeg on your system PATH for Whisper: sudo apt update && sudo apt install ffmpeg (Linux) or download from ffmpeg.org.
Audio Setup: Configure your system audio (VB-Audio Cable, BlackHole, PulseAudio) as described in your initial prompt.
List Audio Devices: Run python your_script_name.py list_devices.
Set INPUT_DEVICE_INDEX: Edit the script and set the INPUT_DEVICE_INDEX variable to the correct number for your desired input audio device.

Here's the refined script:

project="realtime_audio_translator_v2"
file="translator_v2.py"
type="python"
import os
import tempfile
import wave
import numpy as np
import sounddevice as sd
import whisper
import argostranslate.package
import argostranslate.translate
from TTS.api import TTS
import time
import sys
import torch # For checking CUDA availability

# --- Configuration ---
# Audio settings
RECORD_SECONDS = 3
INPUT_DEVICE_INDEX = None  # IMPORTANT: Set this after running with 'list_devices'
SAMPLE_RATE = 16000
CHANNELS = 1

# Whisper settings
WHISPER_MODEL_NAME = "base"  # e.g., "tiny.en", "base.en", "small", "medium"
# If using a multilingual model (not ".en"), Whisper will detect source language.

# Argos Translate settings
TARGET_LANGUAGE_CODE = "es"  # Target language for translation (e.g., "es", "fr", "de")

# Coqui TTS settings
TTS_MODELS = {
    "en": "tts_models/en/ljspeech/tacotron2-DDC",
    "es": "tts_models/es/css10/vits",
    "fr": "tts_models/fr/css10/vits",
    "de": "tts_models/de/css10/vits",
    # Add other languages and their corresponding Coqui TTS models
    # Find models using `tts --list_models` in your terminal
}
USE_GPU_TTS = False # Set to True if you have a compatible GPU for Coqui TTS

# --- Global Variables ---
whisper_model = None
argos_translator = None
coqui_tts = None
is_whisper_en_model = ".en" in WHISPER_MODEL_NAME

def list_audio_devices():
    """Lists available audio input devices."""
    print("Available audio input devices:")
    try:
        devices = sd.query_devices()
        found_input_device = False
        for i, device in enumerate(devices):
            if device['max_input_channels'] > 0:
                print(f"  Index {i}: {device['name']} (Input Channels: {device['max_input_channels']})")
                found_input_device = True
        if not found_input_device:
            print("  No input devices found. Ensure microphone or virtual cable is connected and configured.")
    except Exception as e:
        print(f"Error querying audio devices: {e}")
        print("Ensure sounddevice and PortAudio (or equivalent) are correctly installed.")
    print("\nSet INPUT_DEVICE_INDEX in the script to the desired input device number.")

def initialize_whisper():
    """Initializes the Whisper model."""
    global whisper_model
    print(f"Loading Whisper model: {WHISPER_MODEL_NAME}...")
    try:
        whisper_model = whisper.load_model(WHISPER_MODEL_NAME)
        print("Whisper model loaded successfully.")
    except Exception as e:
        print(f"Error loading Whisper model ({WHISPER_MODEL_NAME}): {e}")
        print("Ensure PyTorch is installed correctly and the model name is valid.")
        print("You might need to install ffmpeg: `sudo apt install ffmpeg` or download from ffmpeg.org.")
        sys.exit(1)

def initialize_argos_translate(source_lang_code, target_lang_code):
    """Initializes Argos Translate for the given language pair."""
    global argos_translator
    print(f"Initializing Argos Translate for {source_lang_code} -> {target_lang_code}...")
    try:
        # Attempt to get the translation directly
        argos_translator = argostranslate.translate.get_translation_from_codes(source_lang_code, target_lang_code)
        print(f"Argos Translate for {source_lang_code} -> {target_lang_code} loaded successfully.")
        return
    except Exception: # pylint: disable=broad-except
        print(f"Direct translation {source_lang_code} -> {target_lang_code} not found. Checking packages...")

    try:
        argostranslate.package.update_package_index()
        available_packages = argostranslate.package.get_available_packages()
        
        # Try to find and install the required package(s)
        # This is a simplified attempt for direct packages. Multi-hop might require manual setup.
        package_to_install = next(
            (pkg for pkg in available_packages if pkg.from_code == source_lang_code and pkg.to_code == target_lang_code),
            None
        )

        if package_to_install:
            installed_argos_packages = argostranslate.package.get_installed_packages()
            is_pkg_already_installed = any(
                p.from_code == package_to_install.from_code and \
                p.to_code == package_to_install.to_code and \
                p.package_version == package_to_install.package_version 
                for p in installed_argos_packages
            )
            if not is_pkg_already_installed:
                print(f"Attempting to download and install Argos Translate package: {package_to_install.from_code}_to_{package_to_install.to_code}")
                download_path = package_to_install.download()
                argostranslate.package.install_from_path(download_path)
                print("Package installed. Re-attempting to load translation...")
                argos_translator = argostranslate.translate.get_translation_from_codes(source_lang_code, target_lang_code)
                print(f"Argos Translate for {source_lang_code} -> {target_lang_code} loaded after package installation.")
                return
            else:
                print(f"Package {package_to_install.from_code}_to_{package_to_install.to_code} seems already installed, but direct load failed.")
        
        print(f"Could not automatically find or install a direct Argos Translate package for {source_lang_code} -> {target_lang_code}.")

    except Exception as e_pkg: # pylint: disable=broad-except
        print(f"Error during Argos Translate package handling: {e_pkg}")

    print(f"Failed to initialize Argos Translate for {source_lang_code} -> {target_lang_code}.")
    print("Please install the required language models manually using 'argospm'.")
    print(f"E.g., `argospm update && argospm install translate-{source_lang_code}_{target_lang_code}`")
    print("Translation will be skipped for this pair if not resolved.")
    argos_translator = None # Ensure it's None if setup failed

def initialize_coqui_tts(language_code):
    """Initializes Coqui TTS for the target language."""
    global coqui_tts
    
    model_name = TTS_MODELS.get(language_code)
    if not model_name:
        print(f"No Coqui TTS model specified in TTS_MODELS for language code: {language_code}.")
        print("TTS will not be available for this language.")
        coqui_tts = None
        return

    print(f"Initializing Coqui TTS with model: {model_name} for language: {language_code} (GPU: {USE_GPU_TTS})...")
    try:
        coqui_tts = TTS(model_name=model_name, progress_bar=False, gpu=USE_GPU_TTS)
        print("Coqui TTS initialized successfully.")
    except Exception as e:
        print(f"Error initializing Coqui TTS model {model_name}: {e}")
        print("Ensure PyTorch is installed correctly and the TTS model name is valid.")
        print("Models are usually downloaded on first use. Check network connection if it's the first time.")
        coqui_tts = None

def record_audio(device_index, duration, samplerate, channels):
    """Records audio from the specified device."""
    print(f"Recording for {duration} seconds from device index {device_index}...")
    try:
        recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=channels, device=device_index, dtype='int16')
        sd.wait()
        print("Recording finished.")
        return recording
    except Exception as e:
        print(f"Error during audio recording: {e}")
        if device_index is None:
            print("INPUT_DEVICE_INDEX is not set. Cannot record.")
        return None

def save_to_temp_wav(recording_data, samplerate, channels):
    """Saves audio data to a temporary WAV file."""
    if recording_data is None:
        return None
    try:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav', prefix='rec_')
        with wave.open(temp_file.name, 'wb') as wf:
            wf.setnchannels(channels)
            wf.setsampwidth(2)  # 16-bit audio
            wf.setframerate(samplerate)
            wf.writeframes(recording_data.tobytes())
        return temp_file.name
    except Exception as e:
        print(f"Error saving temporary WAV file: {e}")
        return None

def transcribe_audio(audio_path):
    """Transcribes audio using Whisper."""
    if not whisper_model or not audio_path:
        return None, None
    print("Transcribing audio...")
    try:
        use_fp16 = torch.cuda.is_available() # Use fp16 if CUDA GPU is available
        print(f"Whisper using fp16: {use_fp16}")
        result = whisper_model.transcribe(audio_path, fp16=use_fp16)
        text = result["text"].strip()
        detected_language = result["language"]
        print(f"Transcription: '{text}' (Detected language: {detected_language})")
        return text, detected_language
    except Exception as e:
        print(f"Error during transcription: {e}")
        return None, None

def translate_text_with_argos(text_to_translate, source_lang_code, target_lang_code_val):
    """Translates text using Argos Translate, re-initializing if necessary."""
    global argos_translator
    if not text_to_translate or not source_lang_code or not target_lang_code_val:
        return None

    # Check if translator needs re-initialization (if source language changed or not initialized)
    current_translator_source = None
    current_translator_target = None
    if argos_translator and hasattr(argos_translator, 'from_lang') and hasattr(argos_translator, 'to_lang'):
         current_translator_source = argos_translator.from_lang.code
         current_translator_target = argos_translator.to_lang.code
    
    if not argos_translator or \
       current_translator_source != source_lang_code or \
       current_translator_target != target_lang_code_val:
        print(f"Re-initializing Argos Translate for {source_lang_code} -> {target_lang_code_val} (current: {current_translator_source}->{current_translator_target})")
        initialize_argos_translate(source_lang_code, target_lang_code_val)

    if not argos_translator: # Check again after attempting initialization
        print(f"Argos Translate not available for {source_lang_code} -> {target_lang_code_val}. Skipping translation.")
        return None
        
    print(f"Translating '{text_to_translate}' from {source_lang_code} to {target_lang_code_val}...")
    try:
        translated_text = argos_translator.translate(text_to_translate)
        print(f"Translation: '{translated_text}'")
        return translated_text
    except Exception as e:
        print(f"Error during Argos translation: {e}")
        return None

def speak_text_with_coqui(text_to_speak, language_code_for_tts):
    """Speaks text using Coqui TTS, re-initializing if necessary."""
    global coqui_tts
    if not text_to_speak or not language_code_for_tts:
        return

    expected_model_name = TTS_MODELS.get(language_code_for_tts)
    if not expected_model_name:
        print(f"No Coqui TTS model specified for language code: {language_code_for_tts}. Cannot speak.")
        return

    if not coqui_tts or coqui_tts.model_name != expected_model_name:
        print(f"Coqui TTS needs (re)initialization for {language_code_for_tts} (expected model: {expected_model_name}, current: {coqui_tts.model_name if coqui_tts else 'None'}).")
        initialize_coqui_tts(language_code_for_tts)

    if not coqui_tts: # Check again after attempting initialization
        print("Coqui TTS not available. Cannot speak text.")
        return

    print(f"Speaking '{text_to_speak}' in {language_code_for_tts}...")
    try:
        # For most models in TTS_MODELS, a simple call is enough.
        # If a model is multi-speaker/multi-lingual, you might need:
        # wav_data = coqui_tts.tts(text=text_to_speak, speaker=coqui_tts.speakers[0], language=coqui_tts.languages[0])
        wav_data = coqui_tts.tts(text=text_to_speak)
        
        audio_data_np = np.array(wav_data, dtype=np.float32)
        output_sample_rate = coqui_tts.synthesizer.output_sample_rate
        
        sd.play(audio_data_np, samplerate=output_sample_rate)
        sd.wait()
        print("Playback finished.")
    except Exception as e:
        print(f"Error during Coqui TTS speech synthesis or playback: {e}")
        if "speaker_id" in str(e).lower() or "language_id" in str(e).lower() or "speaker_idx" in str(e).lower():
             print("This Coqui TTS model might require specifying speaker_idx and/or language_idx.")
             print("E.g., `wav_data = coqui_tts.tts(text=text_to_speak, speaker_idx='p225', language_idx='en')` (example values).")
             print("Check the model card or `tts --list_models` for details on available speakers/languages for the model.")

def main_loop():
    """Main real-time translation loop."""
    global is_whisper_en_model # Access the global flag

    if INPUT_DEVICE_INDEX is None:
        print("CRITICAL ERROR: INPUT_DEVICE_INDEX is not set.")
        print("Please run the script with the 'list_devices' argument first, identify your input device,")
        print("then edit the script to set the INPUT_DEVICE_INDEX variable at the top.")
        return

    # Initializations
    initialize_whisper()
    
    # Initial Argos Translate and Coqui TTS setup.
    # Argos will be re-initialized dynamically if the detected source language changes.
    # Coqui is for the fixed TARGET_LANGUAGE_CODE.
    initial_source_lang_for_argos = "en" if is_whisper_en_model else "en" # Default to 'en' for first Argos init
    initialize_argos_translate(initial_source_lang_for_argos, TARGET_LANGUAGE_CODE)
    initialize_coqui_tts(TARGET_LANGUAGE_CODE)

    print("\nStarting real-time translation loop. Press Ctrl+C to stop.")
    try:
        while True:
            print(f"\n--- Loop Start (Listening for {RECORD_SECONDS}s) ---")
            loop_start_time = time.time()

            # 1. Record audio
            recording = record_audio(INPUT_DEVICE_INDEX, RECORD_SECONDS, SAMPLE_RATE, CHANNELS)
            if recording is None:
                print("Skipping iteration due to recording error.")
                time.sleep(1)
                continue
            
            temp_wav_path = save_to_temp_wav(recording, SAMPLE_RATE, CHANNELS)
            if not temp_wav_path:
                print("Skipping iteration due to WAV saving error.")
                if os.path.exists(temp_wav_path or ""): os.remove(temp_wav_path) # Defensive cleanup
                continue
            
            # 2. Transcribe
            transcribed_text, detected_source_lang_code = transcribe_audio(temp_wav_path)
            if os.path.exists(temp_wav_path): # Ensure cleanup even if transcription fails partially
                 os.remove(temp_wav_path)

            if not transcribed_text:
                print("No transcription obtained. Skipping rest of loop.")
                continue

            # Determine actual source language for translation
            # If Whisper model is English-only, source is 'en'. Otherwise, use detected.
            actual_source_lang_for_translation = "en" if is_whisper_en_model else detected_source_lang_code
            if not actual_source_lang_for_translation:
                print("Could not determine source language from Whisper. Skipping translation.")
                continue
            
            # 3. Translate
            translated_text = translate_text_with_argos(transcribed_text, actual_source_lang_for_translation, TARGET_LANGUAGE_CODE)
            if not translated_text:
                print("No translation obtained. Skipping TTS.")
                continue

            # 4. Speak translated text
            speak_text_with_coqui(translated_text, TARGET_LANGUAGE_CODE) # TTS uses the target language
            
            loop_end_time = time.time()
            print(f"Loop completed in {loop_end_time - loop_start_time:.2f} seconds.")

    except KeyboardInterrupt:
        print("\nUser interrupted. Stopping real-time translation.")
    except Exception as e_main: # pylint: disable=broad-except
        print(f"\nAn unexpected error occurred in the main loop: {e_main}")
    finally:
        print("Exiting application.")

if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1].lower() == "list_devices":
        list_audio_devices()
    else:
        # --- IMPORTANT ---
        # SET YOUR INPUT_DEVICE_INDEX near the top of the script after running:
        # python your_script_name.py list_devices
        # Example: INPUT_DEVICE_INDEX = 2 (replace 2 with your device's actual index)
        # --- ----------- ---
        if INPUT_DEVICE_INDEX is None: # Final check before starting main loop
             print("ERROR: INPUT_DEVICE_INDEX is not set at the top of the script.")
             print("Run `python translator_v2.py list_devices`, then edit the script to set INPUT_DEVICE_INDEX.")
        else:
             main_loop()

Main Features

1. Tabbed Interface

2. Translation Controls

3. Settings Configuration

4. Real-time Monitoring

How to Use

Installation

Setup Process

Audio Setup Requirements

Technical Highlights

Supported Languages

Performance Tips