Skip to content

Text Analysis - Combining Strings and Sets

CS 5001/5002 - Strings, Sequences & Sets

Code

#!/usr/bin/env python3
"""
Filename: text_analysis.py
Description: Text Analysis - Combining Strings and Sets
CS 5001/5002 - Strings, Sequences & Sets

This script demonstrates comprehensive text analysis using both sequences
and sets to analyze character frequencies, unique characters, and patterns.
"""

def analyze_text(text):
    """Comprehensive text analysis using both sequences and sets"""

    # Sequence analysis
    print(f"Text: '{text}'")
    print(f"Length (with spaces): {len(text)}")
    print(f"Character at position 0: '{text[0]}'")
    print(f"Character at position -1: '{text[-1]}'")

    # Convert to lowercase for analysis
    text_lower = text.lower()

    # Set analysis - unique characters
    unique_chars = set(text_lower)
    print(f"Unique characters: {sorted(unique_chars)}")
    print(f"Number of unique characters: {len(unique_chars)}")

    # Vowels and consonants
    vowels = set('aeiou')
    consonants = set('bcdfghjklmnpqrstvwxyz')

    text_vowels = unique_chars & vowels
    text_consonants = unique_chars & consonants

    print(f"Vowels in text: {sorted(text_vowels)}")
    print(f"Consonants in text: {sorted(text_consonants)}")

    # Character frequency (using sequences)
    char_freq = {}
    for char in text_lower:
        if char.isalpha():  # Only count letters
            char_freq[char] = char_freq.get(char, 0) + 1

    print(f"Character frequencies: {dict(sorted(char_freq.items()))}")

    # Most common characters
    if char_freq:
        max_freq = max(char_freq.values())
        most_common = {char for char, freq in char_freq.items() if freq == max_freq}
        print(f"Most frequent character(s): {most_common} (appears {max_freq} times)")

def main():
    # Analyze different texts
    texts = [
        "Hello World",
        "Northeastern University",
        "Computer Science and Mathematics"
    ]

    for text in texts:
        print("=" * 50)
        analyze_text(text)
        print()

if __name__ == "__main__":
    main()

How to Use

  1. Copy the code above
  2. Save it as a .py file (e.g., text_analysis.py)
  3. Run it with: python text_analysis.py

Part of CS 5001/5002 - Strings, Sequences & Sets materials