Code for YouTube Video Transcription Summarization with Python Tutorial


View on Github
import os
import re
import nltk
import pytube
import youtube_transcript_api
from youtube_transcript_api import YouTubeTranscriptApi
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.probability import FreqDist
from heapq import nlargest
from urllib.parse import urlparse, parse_qs
import textwrap
from colorama import Fore, Back, Style, init
from openai import OpenAI

# Initialize colorama for cross-platform colored terminal output
init(autoreset=True)

# Download necessary NLTK data
nltk.download('punkt_tab', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

# Initialize OpenAI client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key="<api_key>", # Add your OpenRouter API key here
)

def extract_video_id(youtube_url):
    """Extract the video ID from a YouTube URL."""
    parsed_url = urlparse(youtube_url)
    
    if parsed_url.netloc == 'youtu.be':
        return parsed_url.path[1:]
    
    if parsed_url.netloc in ('www.youtube.com', 'youtube.com'):
        if parsed_url.path == '/watch':
            return parse_qs(parsed_url.query)['v'][0]
        elif parsed_url.path.startswith('/embed/'):
            return parsed_url.path.split('/')[2]
        elif parsed_url.path.startswith('/v/'):
            return parsed_url.path.split('/')[2]
    
    # If no match found
    raise ValueError(f"Could not extract video ID from URL: {youtube_url}")

def get_transcript(video_id):
    """Get the transcript of a YouTube video."""
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return ' '.join([entry['text'] for entry in transcript])
    except Exception as e:
        return f"Error retrieving transcript: {str(e)}."

def summarize_text_nltk(text, num_sentences=5):
    """Summarize text using frequency-based extractive summarization with NLTK."""
    if not text or text.startswith("Error") or text.startswith("Transcript not available"):
        return text
    
    # Tokenize the text into sentences and words
    sentences = sent_tokenize(text)
    
    # If there are fewer sentences than requested, return all sentences
    if len(sentences) <= num_sentences:
        return text
    
    # Tokenize words and remove stopwords
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    words = [word for word in words if word.isalnum() and word not in stop_words]
    
    # Calculate word frequencies
    freq = FreqDist(words)
    
    # Score sentences based on word frequencies
    sentence_scores = {}
    for i, sentence in enumerate(sentences):
        for word in word_tokenize(sentence.lower()):
            if word in freq:
                if i in sentence_scores:
                    sentence_scores[i] += freq[word]
                else:
                    sentence_scores[i] = freq[word]
    
    # Get the top N sentences with highest scores
    summary_sentences_indices = nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
    summary_sentences_indices.sort()  # Sort to maintain original order
    
    # Construct the summary
    summary = ' '.join([sentences[i] for i in summary_sentences_indices])
    return summary

def summarize_text_ai(text, video_title, num_sentences=5):
    """Summarize text using the Mistral AI model via OpenRouter."""
    if not text or text.startswith("Error") or text.startswith("Transcript not available"):
        return text
    
    # Truncate text if it's too long (models often have token limits)
    max_chars = 15000  # Adjust based on model's context window
    truncated_text = text[:max_chars] if len(text) > max_chars else text
    
    prompt = f"""Please provide a concise summary of the following YouTube video transcript.
Title: {video_title}

Transcript:
{truncated_text}

Create a clear, informative summary that captures the main points and key insights from the video.
Your summary should be approximately {num_sentences} sentences long.
"""
    
    try:
        completion = client.chat.completions.create(
            model="mistralai/mistral-small-3.1-24b-instruct:free",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ]
                }
            ]
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error generating AI summary: {str(e)}"

def summarize_youtube_video(youtube_url, num_sentences=5):
    """Main function to summarize a YouTube video's transcription."""
    try:
        video_id = extract_video_id(youtube_url)
        transcript = get_transcript(video_id)
        
        # Get video title for context
        try:
            yt = pytube.YouTube(youtube_url)
            video_title = yt.title
            
        except Exception as e:
            video_title = "Unknown Title"

        
        # Generate both summaries
        print(Fore.YELLOW + f"Generating AI summary with {num_sentences} sentences...")
        ai_summary = summarize_text_ai(transcript, video_title, num_sentences)
        
        print(Fore.YELLOW + f"Generating NLTK summary with {num_sentences} sentences...")
        nltk_summary = summarize_text_nltk(transcript, num_sentences)
        
        return {
            "video_title": video_title,
            "video_id": video_id,
            "ai_summary": ai_summary,
            "nltk_summary": nltk_summary,
            "full_transcript_length": len(transcript.split()),
            "nltk_summary_length": len(nltk_summary.split()),
            "ai_summary_length": len(ai_summary.split()) if not ai_summary.startswith("Error") else 0
        }
    except Exception as e:
        return {"error": str(e)}

def format_time(seconds):
    """Convert seconds to a readable time format."""
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    
    if hours > 0:
        return f"{hours}h {minutes}m {seconds}s"
    elif minutes > 0:
        return f"{minutes}m {seconds}s"
    else:
        return f"{seconds}s"

def format_number(number):
    """Format large numbers with commas for readability."""
    return "{:,}".format(number)

def print_boxed_text(text, width=80, title=None, color=Fore.WHITE):
    """Print text in a nice box with optional title."""
    wrapper = textwrap.TextWrapper(width=width-4)  # -4 for the box margins
    wrapped_text = wrapper.fill(text)
    lines = wrapped_text.split('\n')
    
    # Print top border with optional title
    if title:
        title_space = width - 4 - len(title)
        left_padding = title_space // 2
        right_padding = title_space - left_padding
        print(color + '┌' + '─' * left_padding + title + '─' * right_padding + '┐')
    else:
        print(color + '┌' + '─' * (width-2) + '┐')
    
    # Print content
    for line in lines:
        padding = width - 2 - len(line)
        print(color + '│ ' + line + ' ' * padding + '│')
    
    # Print bottom border
    print(color + '└' + '─' * (width-2) + '┘')

def print_summary_result(result, width=80):
    """Print the summary result in a nicely formatted way."""
    if "error" in result:
        print_boxed_text(f"Error: {result['error']}", width=width, title="ERROR", color=Fore.RED)
        return
    
    # Terminal width
    terminal_width = width
    
    # Print header with video information
    print("\n" + Fore.CYAN + "=" * terminal_width)
    print(Fore.CYAN + Style.BRIGHT + result['video_title'].center(terminal_width))
    print(Fore.CYAN + "=" * terminal_width + "\n")
    
    # Video metadata section
    print(Fore.YELLOW + Style.BRIGHT + "VIDEO INFORMATION".center(terminal_width))
    print(Fore.YELLOW + "─" * terminal_width)
    
    # Two-column layout for metadata
    col_width = terminal_width // 2 - 2
    
    # Row 3
    print(f"{Fore.GREEN}Video ID: {Fore.WHITE}{result['video_id']:<{col_width}}"
          f"{Fore.GREEN}URL: {Fore.WHITE}https://youtu.be/{result['video_id']}")
    
    print(Fore.YELLOW + "─" * terminal_width + "\n")
    
    # AI Summary section
    ai_compression = "N/A"
    if result['ai_summary_length'] > 0:
        ai_compression = round((1 - result['ai_summary_length'] / result['full_transcript_length']) * 100)
    
    ai_summary_title = f" AI SUMMARY ({result['ai_summary_length']} words, condensed {ai_compression}% from {result['full_transcript_length']} words) "
    
    print(Fore.GREEN + Style.BRIGHT + ai_summary_title.center(terminal_width))
    print(Fore.GREEN + "─" * terminal_width)
    
    # Print the AI summary with proper wrapping
    wrapper = textwrap.TextWrapper(width=terminal_width-4, 
                                  initial_indent='  ', 
                                  subsequent_indent='  ')
    
    # Split AI summary into paragraphs and print each
    ai_paragraphs = result['ai_summary'].split('\n')
    for paragraph in ai_paragraphs:
        if paragraph.strip():  # Skip empty paragraphs
            print(wrapper.fill(paragraph))
            print()  # Empty line between paragraphs
    
    print(Fore.GREEN + "─" * terminal_width + "\n")
    
    # NLTK Summary section
    nltk_compression = round((1 - result['nltk_summary_length'] / result['full_transcript_length']) * 100)
    nltk_summary_title = f" NLTK SUMMARY ({result['nltk_summary_length']} words, condensed {nltk_compression}% from {result['full_transcript_length']} words) "
    
    print(Fore.MAGENTA + Style.BRIGHT + nltk_summary_title.center(terminal_width))
    print(Fore.MAGENTA + "─" * terminal_width)
    
    # Split NLTK summary into paragraphs and wrap each
    paragraphs = result['nltk_summary'].split('. ')
    formatted_paragraphs = []
    
    current_paragraph = ""
    for sentence in paragraphs:
        if not sentence.endswith('.'):
            sentence += '.'
        
        if len(current_paragraph) + len(sentence) + 1 <= 150:  # Arbitrary length for paragraph
            current_paragraph += " " + sentence if current_paragraph else sentence
        else:
            if current_paragraph:
                formatted_paragraphs.append(current_paragraph)
            current_paragraph = sentence
    
    if current_paragraph:
        formatted_paragraphs.append(current_paragraph)
    
    # Print each paragraph
    for paragraph in formatted_paragraphs:
        print(wrapper.fill(paragraph))
        print()  # Empty line between paragraphs
    
    print(Fore.MAGENTA + "─" * terminal_width + "\n")


if __name__ == "__main__":
    # Get terminal width
    try:
        terminal_width = os.get_terminal_size().columns
        # Limit width to reasonable range
        terminal_width = max(80, min(terminal_width, 120))
    except:
        terminal_width = 80  # Default if can't determine
    
    # Print welcome banner
    print(Fore.CYAN + Style.BRIGHT + "\n" + "=" * terminal_width)
    print(Fore.CYAN + Style.BRIGHT + "YOUTUBE VIDEO SUMMARIZER".center(terminal_width))
    print(Fore.CYAN + Style.BRIGHT + "=" * terminal_width + "\n")
    
    youtube_url = input(Fore.GREEN + "Enter YouTube video URL: " + Fore.WHITE)
    
    num_sentences_input = input(Fore.GREEN + "Enter number of sentences for summaries (default 5): " + Fore.WHITE)
    num_sentences = int(num_sentences_input) if num_sentences_input.strip() else 5
    
    print(Fore.YELLOW + "\nFetching and analyzing video transcript... Please wait...\n")
    
    result = summarize_youtube_video(youtube_url, num_sentences)
    print_summary_result(result, width=terminal_width)

youtube_transcript_summarizer.py