import subprocess
import logging
from moviepy.editor import VideoFileClip
from transformers import MarianMTModel, MarianTokenizer
from datetime import timedelta
import srt
import os
import whisper

# Step 1: Extract Audio from Video
def extract_audio(video_path, audio_path):
    try:
        video = VideoFileClip(video_path)
        video.audio.write_audiofile(audio_path)
        logging.info("Audio extracted successfully.")
    except Exception as e:
        logging.error(f"Error extracting audio: {e}")

# Step 2: Transcribe Audio to Text using Vosk
def transcribe_audio(audio_path):
    try:
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        logging.info("Transcription completed.")
        return result["segments"]
    except Exception as e:
        logging.error(f"Error during transcription: {e}")
        return []
# Step 3: Translate Transcribed Text
def translate_text(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors="pt", padding="longest", truncation=True, max_length=512)
    translated = model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Step 4: Generate Subtitles with Translation
def generate_subtitles(transcribed_segments, model, tokenizer):
    subtitles = []
    for segment in transcribed_segments:
        start = timedelta(seconds=segment["start"])
        end = timedelta(seconds=segment["end"])
        translated_text = translate_text(segment["text"], model, tokenizer)
        subtitles.append(srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=translated_text))
    return subtitles

# Step 5: Write Subtitles to SRT File
def write_subtitles_to_srt(subtitles, output_srt_path):
    with open(output_srt_path, "w") as f:
        f.write(srt.compose(subtitles))

# Step 6: Add Subtitles to Video
def add_subtitles_to_video(video_path, subtitle_path, output_video_path):
    try:
        # Use ffmpeg to add subtitles
        command = [
            'ffmpeg',
            '-i', video_path,
            '-vf', f"subtitles={subtitle_path}",
            '-c:a', 'copy',
            output_video_path
        ]
        subprocess.run(command, check=True)
        logging.info(f"Final video with subtitles saved to {output_video_path}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error adding subtitles to video: {e}")

# Main function
def main(local_video_path, output_srt_path, output_video_path):
    video_file = local_video_path
    if not os.path.exists(video_file):
        logging.error("No valid video file found.")
        return

    audio_file = "audio.wav"
    extract_audio(video_file, audio_file)

    transcribed_segments = transcribe_audio(audio_file)

    model_name = "Helsinki-NLP/opus-mt-en-zh"
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    subtitles = generate_subtitles(transcribed_segments, model, tokenizer)

    write_subtitles_to_srt(subtitles, output_srt_path)

    add_subtitles_to_video(video_file, output_srt_path, output_video_path)

    print(f"Subtitles saved to {output_srt_path}")

    # Clean up the audio file
    if os.path.exists(audio_file):
        os.remove(audio_file)

# Usage
if __name__ == "__main__":
    local_video_path = "cardifftestvideo.mp4"
    output_srt_path = "chinese_translated_video.srt"
    output_video_path = "chinese_translated_video.mp4"
    main(local_video_path, output_srt_path, output_video_path)
