Text Analysis - Combining Strings and Sets¶
CS 5001/5002 - Strings, Sequences & Sets
Code¶
#!/usr/bin/env python3
"""
Filename: text_analysis.py
Description: Text Analysis - Combining Strings and Sets
CS 5001/5002 - Strings, Sequences & Sets
This script demonstrates comprehensive text analysis using both sequences
and sets to analyze character frequencies, unique characters, and patterns.
"""
def analyze_text(text):
"""Comprehensive text analysis using both sequences and sets"""
# Sequence analysis
print(f"Text: '{text}'")
print(f"Length (with spaces): {len(text)}")
print(f"Character at position 0: '{text[0]}'")
print(f"Character at position -1: '{text[-1]}'")
# Convert to lowercase for analysis
text_lower = text.lower()
# Set analysis - unique characters
unique_chars = set(text_lower)
print(f"Unique characters: {sorted(unique_chars)}")
print(f"Number of unique characters: {len(unique_chars)}")
# Vowels and consonants
vowels = set('aeiou')
consonants = set('bcdfghjklmnpqrstvwxyz')
text_vowels = unique_chars & vowels
text_consonants = unique_chars & consonants
print(f"Vowels in text: {sorted(text_vowels)}")
print(f"Consonants in text: {sorted(text_consonants)}")
# Character frequency (using sequences)
char_freq = {}
for char in text_lower:
if char.isalpha(): # Only count letters
char_freq[char] = char_freq.get(char, 0) + 1
print(f"Character frequencies: {dict(sorted(char_freq.items()))}")
# Most common characters
if char_freq:
max_freq = max(char_freq.values())
most_common = {char for char, freq in char_freq.items() if freq == max_freq}
print(f"Most frequent character(s): {most_common} (appears {max_freq} times)")
def main():
# Analyze different texts
texts = [
"Hello World",
"Northeastern University",
"Computer Science and Mathematics"
]
for text in texts:
print("=" * 50)
analyze_text(text)
print()
if __name__ == "__main__":
main()
How to Use¶
- Copy the code above
- Save it as a
.pyfile (e.g.,text_analysis.py) - Run it with:
python text_analysis.py
Part of CS 5001/5002 - Strings, Sequences & Sets materials