Data Processing - Sequences to Sets and Back¶
CS 5001/5002 - Strings, Sequences & Sets
Code¶
#!/usr/bin/env python3
"""
Filename: data_processing.py
Description: Data Processing - Sequences to Sets and Back
CS 5001/5002 - Strings, Sequences & Sets
This script demonstrates practical data processing using sequences and sets
for enrollment analysis and deduplication.
"""
def process_student_data():
"""Demonstrate data processing using sequences and sets"""
# Student enrollment data (sequences with possible duplicates)
cs5001_students = ["Alice", "Bob", "Charlie", "Diana", "Alice", "Eve"]
cs5002_students = ["Bob", "Charlie", "Frank", "Grace", "Alice"]
print("Original enrollment lists (sequences):")
print(f"CS 5001: {cs5001_students}")
print(f"CS 5002: {cs5002_students}")
# Convert to sets for analysis
cs5001_set = set(cs5001_students)
cs5002_set = set(cs5002_students)
print(f"\nUnique students per class (sets):")
print(f"CS 5001: {cs5001_set}")
print(f"CS 5002: {cs5002_set}")
# Set operations for analysis
both_classes = cs5001_set & cs5002_set
only_5001 = cs5001_set - cs5002_set
only_5002 = cs5002_set - cs5001_set
all_students = cs5001_set | cs5002_set
print(f"\nEnrollment analysis:")
print(f"Taking both classes: {both_classes}")
print(f"Only CS 5001: {only_5001}")
print(f"Only CS 5002: {only_5002}")
print(f"All students: {all_students}")
# Statistics
print(f"\nStatistics:")
print(f"Total unique students: {len(all_students)}")
print(f"Students in both classes: {len(both_classes)}")
print(f"Percentage taking both: {len(both_classes)/len(all_students)*100:.1f}%")
# Convert back to sorted lists for reporting
print(f"\nSorted lists for reports:")
print(f"All students (alphabetical): {sorted(all_students)}")
print(f"Both classes (alphabetical): {sorted(both_classes)}")
def main():
process_student_data()
if __name__ == "__main__":
main()
How to Use¶
- Copy the code above
- Save it as a
.pyfile (e.g.,data_processing.py) - Run it with:
python data_processing.py
Part of CS 5001/5002 - Strings, Sequences & Sets materials