Simple Text-to-speech code python
1. install EdgeTts
pip install edge-tts pydub
import asyncio
import edge_tts
from pydub import AudioSegment
import os
# ------------------------
# Settings
# ------------------------
input_file = "script.txt" # Your text file
output_file = "oput.mp3" # Final MP3
voice = "en-US-GuyNeural" # Neutral natural voice
chunk_size = 200 # Characters per chunk
pause_duration = 70 # Milliseconds pause between chunks
# ------------------------
# Function to split text into manageable chunks
def split_text(text, size):
chunks = []
while len(text) > size:
split_at = text.rfind('.', 0, size)
if split_at == -1:
split_at = size
chunks.append(text[:split_at+1].strip())
text = text[split_at+1:].strip()
if text:
chunks.append(text)
return chunks
async def main():
# Read the text file
if not os.path.exists(input_file):
print(f"Error: {input_file} not found!")
return
with open(input_file, "r", encoding="utf-8") as f:
text = f.read()
chunks = split_text(text, chunk_size)
audio_segments = []
pause = AudioSegment.silent(duration=pause_duration) # ✅ works everywhere
for i, chunk in enumerate(chunks):
temp_file = f"chunk_{i}.mp3"
communicate = edge_tts.Communicate(chunk, voice)
await communicate.save(temp_file)
audio_segments.append(AudioSegment.from_mp3(temp_file))
audio_segments.append(pause) # add pause
print(f"Processed chunk {i+1}/{len(chunks)}")
# Merge all chunks into one MP3
combined = AudioSegment.empty()
for segment in audio_segments:
combined += segment
combined.export(output_file, format="mp3")
print(f"\nDone! Saved as {output_file}, ready for YouTube.")
# Cleanup temp files
for i in range(len(chunks)):
temp_file = f"chunk_{i}.mp3"
if os.path.exists(temp_file):
os.remove(temp_file)
# Run the async function
asyncio.run(main())
Comments
Post a Comment