TEXT TO SPEECH HUMAN VOICE (GOOGLE COLAB)

 



# ================== STEP 1: INSTALL DEPENDENCIES ==================

!pip install --upgrade pip setuptools wheel

!pip install torch==2.2.1 transformers==4.40.2 \

    git+https://github.com/suno-ai/bark.git \

    moviepy==1.0.3 soundfile==0.12.1 pandas==2.2.2 pydub==0.25.1 numpy==1.26.4 gdown




# ================== STEP 2: IMPORTS & MOUNT DRIVE ==================
from google.colab import drive
drive.mount('/content/drive')

import os
import numpy as np
import torch
import pandas as pd
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
from moviepy.editor import AudioFileClip, ImageClip
from pydub import AudioSegment

# Output folder
output_dir = "/content/drive/MyDrive/bark_outputs"
os.makedirs(output_dir, exist_ok=True)

# Load Bark models
preload_models()
print("✅ Bark models loaded successfully")




# ================== STEP 3: READ GOOGLE DOC FROM DRIVE ==================
# Place your Google Doc in Drive as a txt file first
# For example: "/content/drive/MyDrive/speech.txt"
txt_path = "/content/drive/MyDrive/speech.txt"

if not os.path.exists(txt_path):
    raise FileNotFoundError("❌ Please export your Google Doc as a .txt file and put it in Drive.")

# Read lines and remove empty lines
with open(txt_path, "r", encoding="utf-8") as f:
    lines = [line.strip() for line in f.readlines() if line.strip()]

print(f"✅ Loaded {len(lines)} lines from your Google Doc")




# ================== STEP 4: GENERATE AUDIO & VIDEO ==================
for i, text in enumerate(lines):
    print(f"\nšŸŽ™ Generating voice for line {i}: {text[:60]}...")
    
    # Bark TTS audio
    audio_array = generate_audio(text)
    wav_path = f"{output_dir}/output_{i}.wav"
    write_wav(wav_path, SAMPLE_RATE, audio_array)

    # Convert to MP3
    mp3_path = wav_path.replace(".wav", ".mp3")
    AudioSegment.from_wav(wav_path).export(mp3_path, format="mp3")

    # Create MP4 with static image
    img = ImageClip("https://i.imgur.com/Oj1g1qz.png", duration=len(audio_array)/SAMPLE_RATE)
    audio_clip = AudioFileClip(wav_path)
    video = img.set_audio(audio_clip)
    mp4_path = wav_path.replace(".wav", ".mp4")
    video.write_videofile(mp4_path, fps=1, audio_codec="aac", verbose=False, logger=None)

    print(f"✅ Saved: {mp3_path} and {mp4_path}")

print("\nšŸŽ‰ All audio/video files saved to:", output_dir)



PREREQUISIES - MAKE SURE YOUR GOOGLE DOCUMENTS IS NOT PRIVATE 


Comments

Popular Posts