TEXT TO SPEECH HUMAN VOICE (GOOGLE COLAB)
# ================== STEP 1: INSTALL DEPENDENCIES ==================
!pip install --upgrade pip setuptools wheel
!pip install torch==2.2.1 transformers==4.40.2 \
git+https://github.com/suno-ai/bark.git \
moviepy==1.0.3 soundfile==0.12.1 pandas==2.2.2 pydub==0.25.1 numpy==1.26.4 gdown
# ================== STEP 2: IMPORTS & MOUNT DRIVE ==================
from google.colab import drive
drive.mount('/content/drive')
import os
import numpy as np
import torch
import pandas as pd
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
from moviepy.editor import AudioFileClip, ImageClip
from pydub import AudioSegment
# Output folder
output_dir = "/content/drive/MyDrive/bark_outputs"
os.makedirs(output_dir, exist_ok=True)
# Load Bark models
preload_models()
print("✅ Bark models loaded successfully")
# ================== STEP 3: READ GOOGLE DOC FROM DRIVE ==================
# Place your Google Doc in Drive as a txt file first
# For example: "/content/drive/MyDrive/speech.txt"
txt_path = "/content/drive/MyDrive/speech.txt"
if not os.path.exists(txt_path):
raise FileNotFoundError("❌ Please export your Google Doc as a .txt file and put it in Drive.")
# Read lines and remove empty lines
with open(txt_path, "r", encoding="utf-8") as f:
lines = [line.strip() for line in f.readlines() if line.strip()]
print(f"✅ Loaded {len(lines)} lines from your Google Doc")
# ================== STEP 4: GENERATE AUDIO & VIDEO ==================
for i, text in enumerate(lines):
print(f"\nš Generating voice for line {i}: {text[:60]}...")
# Bark TTS audio
audio_array = generate_audio(text)
wav_path = f"{output_dir}/output_{i}.wav"
write_wav(wav_path, SAMPLE_RATE, audio_array)
# Convert to MP3
mp3_path = wav_path.replace(".wav", ".mp3")
AudioSegment.from_wav(wav_path).export(mp3_path, format="mp3")
# Create MP4 with static image
img = ImageClip("https://i.imgur.com/Oj1g1qz.png", duration=len(audio_array)/SAMPLE_RATE)
audio_clip = AudioFileClip(wav_path)
video = img.set_audio(audio_clip)
mp4_path = wav_path.replace(".wav", ".mp4")
video.write_videofile(mp4_path, fps=1, audio_codec="aac", verbose=False, logger=None)
print(f"✅ Saved: {mp3_path} and {mp4_path}")
print("\nš All audio/video files saved to:", output_dir)
PREREQUISIES - MAKE SURE YOUR GOOGLE DOCUMENTS IS NOT PRIVATE
Comments
Post a Comment