package main import ( "encoding/csv" "encoding/json" "fmt" "log" "os" "regexp" "sort" "strconv" "strings" "time" ) type EmailStats struct { TotalEmails int `json:"total_emails"` TopSenders []SenderInfo `json:"top_senders"` TopDomains []DomainInfo `json:"top_domains"` Categories map[string]int `json:"categories"` SubjectPatterns []PatternInfo `json:"subject_patterns"` TimeAnalysis TimeStats `json:"time_analysis"` } type SenderInfo struct { Email string `json:"email"` Count int `json:"count"` Domain string `json:"domain"` } type DomainInfo struct { Domain string `json:"domain"` Count int `json:"count"` Type string `json:"type"` } type PatternInfo struct { Pattern string `json:"pattern"` Count int `json:"count"` Examples []string `json:"examples"` } type TimeStats struct { EmailsByYear map[string]int `json:"emails_by_year"` EmailsByMonth map[string]int `json:"emails_by_month"` OldestEmail string `json:"oldest_email"` NewestEmail string `json:"newest_email"` } func main() { if len(os.Args) < 2 { fmt.Println("Usage: go run analyze.go [output_format]") fmt.Println(" csv_file: path to CSV file from main.go") fmt.Println(" output_format: json (default) or summary") os.Exit(1) } csvFile := os.Args[1] outputFormat := "json" if len(os.Args) > 2 { outputFormat = strings.ToLower(os.Args[2]) } emails, err := loadEmailsFromCSV(csvFile) if err != nil { log.Fatalf("Error loading CSV: %v", err) } fmt.Printf("Analyzing %d emails...\n", len(emails)) stats := analyzeEmails(emails) switch outputFormat { case "summary": printSummary(stats) case "json": outputJSON(stats) default: fmt.Printf("Unknown output format: %s\n", outputFormat) os.Exit(1) } } func loadEmailsFromCSV(filename string) ([]EmailInfo, error) { file, err := os.Open(filename) if err != nil { return nil, err } defer file.Close() reader := csv.NewReader(file) records, err := reader.ReadAll() if err != nil { return nil, err } var emails []EmailInfo for i, record := range records { if i == 0 && record[0] == "Sender" { continue } if len(record) >= 4 { emails = append(emails, EmailInfo{ Sender: record[0], Subject: record[1], Date: record[2], ID: record[3], }) } } return emails, nil } func analyzeEmails(emails []EmailInfo) EmailStats { stats := EmailStats{ TotalEmails: len(emails), Categories: make(map[string]int), TimeAnalysis: TimeStats{ EmailsByYear: make(map[string]int), EmailsByMonth: make(map[string]int), }, } senderCounts := make(map[string]int) domainCounts := make(map[string]int) patternCounts := make(map[string][]string) var oldestTime, newestTime time.Time for _, email := range emails { // Sender analysis senderCounts[email.Sender]++ // Domain analysis domain := extractDomain(email.Sender) if domain != "" { domainCounts[domain]++ } // Subject pattern analysis patterns := detectSubjectPatterns(email.Subject) for _, pattern := range patterns { patternCounts[pattern] = append(patternCounts[pattern], email.Subject) } // Category analysis category := categorizeEmail(email.Sender, email.Subject) stats.Categories[category]++ // Time analysis if emailTime, err := parseEmailDate(email.Date); err == nil { year := emailTime.Format("2006") month := emailTime.Format("2006-01") stats.TimeAnalysis.EmailsByYear[year]++ stats.TimeAnalysis.EmailsByMonth[month]++ if oldestTime.IsZero() || emailTime.Before(oldestTime) { oldestTime = emailTime stats.TimeAnalysis.OldestEmail = email.Date } if newestTime.IsZero() || emailTime.After(newestTime) { newestTime = emailTime stats.TimeAnalysis.NewestEmail = email.Date } } } // Convert maps to sorted slices stats.TopSenders = sortSenders(senderCounts) stats.TopDomains = sortDomains(domainCounts) stats.SubjectPatterns = sortPatterns(patternCounts) return stats } func extractDomain(email string) string { parts := strings.Split(email, "@") if len(parts) != 2 { // Handle cases like "Name " re := regexp.MustCompile(`<([^@]+@[^>]+)>`) matches := re.FindStringSubmatch(email) if len(matches) > 1 { parts = strings.Split(matches[1], "@") if len(parts) == 2 { return strings.ToLower(strings.TrimSpace(parts[1])) } } return "" } return strings.ToLower(strings.TrimSpace(parts[1])) } func detectSubjectPatterns(subject string) []string { var patterns []string subject = strings.ToLower(subject) // Newsletter patterns if strings.Contains(subject, "newsletter") || strings.Contains(subject, "weekly") || strings.Contains(subject, "monthly") || strings.Contains(subject, "digest") { patterns = append(patterns, "newsletter") } // Automated patterns if strings.HasPrefix(subject, "re:") { patterns = append(patterns, "reply") } if strings.HasPrefix(subject, "fwd:") || strings.HasPrefix(subject, "fw:") { patterns = append(patterns, "forward") } // Notification patterns if strings.Contains(subject, "notification") || strings.Contains(subject, "alert") || strings.Contains(subject, "reminder") { patterns = append(patterns, "notification") } // Commercial patterns if strings.Contains(subject, "sale") || strings.Contains(subject, "deal") || strings.Contains(subject, "offer") || strings.Contains(subject, "discount") || strings.Contains(subject, "%") || strings.Contains(subject, "free") { patterns = append(patterns, "promotional") } // Update patterns if strings.Contains(subject, "update") || strings.Contains(subject, "new version") || strings.Contains(subject, "release") { patterns = append(patterns, "update") } // Receipt/confirmation patterns if strings.Contains(subject, "receipt") || strings.Contains(subject, "confirmation") || strings.Contains(subject, "invoice") || strings.Contains(subject, "payment") { patterns = append(patterns, "transactional") } return patterns } func categorizeEmail(sender, subject string) string { domain := extractDomain(sender) senderLower := strings.ToLower(sender) subjectLower := strings.ToLower(subject) // Social networks socialDomains := []string{"facebook.com", "twitter.com", "linkedin.com", "instagram.com", "tiktok.com", "youtube.com", "reddit.com"} for _, social := range socialDomains { if strings.Contains(domain, social) { return "social" } } // Financial if strings.Contains(subjectLower, "payment") || strings.Contains(subjectLower, "invoice") || strings.Contains(subjectLower, "receipt") || strings.Contains(domain, "bank") || strings.Contains(domain, "paypal") || strings.Contains(domain, "stripe") { return "finance" } // Travel if strings.Contains(domain, "booking") || strings.Contains(domain, "airbnb") || strings.Contains(domain, "hotel") || strings.Contains(domain, "airline") || strings.Contains(subjectLower, "flight") || strings.Contains(subjectLower, "reservation") { return "travel" } // Shopping if strings.Contains(domain, "amazon") || strings.Contains(domain, "ebay") || strings.Contains(subjectLower, "order") || strings.Contains(subjectLower, "shipping") { return "shopping" } // Newsletters/Marketing if strings.Contains(senderLower, "noreply") || strings.Contains(senderLower, "no-reply") || strings.Contains(subjectLower, "newsletter") || strings.Contains(subjectLower, "unsubscribe") { return "newsletters" } // Work-related if strings.Contains(domain, "slack") || strings.Contains(domain, "github") || strings.Contains(domain, "jira") || strings.Contains(domain, "atlassian") { return "work" } return "personal" } func parseEmailDate(dateStr string) (time.Time, error) { formats := []string{ time.RFC1123Z, time.RFC1123, "Mon, 2 Jan 2006 15:04:05 -0700", "2 Jan 2006 15:04:05 -0700", "2006-01-02T15:04:05Z07:00", "2006-01-02 15:04:05", } for _, format := range formats { if t, err := time.Parse(format, dateStr); err == nil { return t, nil } } return time.Time{}, fmt.Errorf("unable to parse date: %s", dateStr) } func sortSenders(senderCounts map[string]int) []SenderInfo { var senders []SenderInfo for email, count := range senderCounts { senders = append(senders, SenderInfo{ Email: email, Count: count, Domain: extractDomain(email), }) } sort.Slice(senders, func(i, j int) bool { return senders[i].Count > senders[j].Count }) if len(senders) > 20 { senders = senders[:20] } return senders } func sortDomains(domainCounts map[string]int) []DomainInfo { var domains []DomainInfo for domain, count := range domainCounts { domainType := categorizeDomain(domain) domains = append(domains, DomainInfo{ Domain: domain, Count: count, Type: domainType, }) } sort.Slice(domains, func(i, j int) bool { return domains[i].Count > domains[j].Count }) if len(domains) > 15 { domains = domains[:15] } return domains } func categorizeDomain(domain string) string { domain = strings.ToLower(domain) if strings.Contains(domain, "gmail") || strings.Contains(domain, "yahoo") || strings.Contains(domain, "hotmail") || strings.Contains(domain, "outlook") { return "personal" } if strings.Contains(domain, "facebook") || strings.Contains(domain, "twitter") || strings.Contains(domain, "linkedin") || strings.Contains(domain, "instagram") { return "social" } if strings.Contains(domain, "amazon") || strings.Contains(domain, "ebay") || strings.Contains(domain, "shop") || strings.Contains(domain, "store") { return "commerce" } if strings.Contains(domain, "noreply") || strings.Contains(domain, "no-reply") || strings.Contains(domain, "mail") { return "automated" } return "business" } func sortPatterns(patternCounts map[string][]string) []PatternInfo { var patterns []PatternInfo for pattern, examples := range patternCounts { // Limit examples to 3 limitedExamples := examples if len(limitedExamples) > 3 { limitedExamples = limitedExamples[:3] } patterns = append(patterns, PatternInfo{ Pattern: pattern, Count: len(examples), Examples: limitedExamples, }) } sort.Slice(patterns, func(i, j int) bool { return patterns[i].Count > patterns[j].Count }) return patterns } func printSummary(stats EmailStats) { fmt.Printf("\n=== EMAIL ANALYSIS SUMMARY ===\n") fmt.Printf("Total emails analyzed: %d\n\n", stats.TotalEmails) fmt.Printf("TOP SENDERS:\n") for i, sender := range stats.TopSenders { if i >= 10 { break } fmt.Printf(" %d. %s (%d emails)\n", i+1, sender.Email, sender.Count) } fmt.Printf("\nTOP DOMAINS:\n") for i, domain := range stats.TopDomains { if i >= 10 { break } fmt.Printf(" %d. %s (%d emails, %s)\n", i+1, domain.Domain, domain.Count, domain.Type) } fmt.Printf("\nEMAIL CATEGORIES:\n") for category, count := range stats.Categories { percentage := float64(count) / float64(stats.TotalEmails) * 100 fmt.Printf(" %s: %d emails (%.1f%%)\n", category, count, percentage) } fmt.Printf("\nSUBJECT PATTERNS:\n") for _, pattern := range stats.SubjectPatterns { fmt.Printf(" %s: %d emails\n", pattern.Pattern, pattern.Count) } fmt.Printf("\nTIME ANALYSIS:\n") fmt.Printf(" Date range: %s to %s\n", stats.TimeAnalysis.OldestEmail, stats.TimeAnalysis.NewestEmail) fmt.Printf(" Years with emails: %d\n", len(stats.TimeAnalysis.EmailsByYear)) } func outputJSON(stats EmailStats) { encoder := json.NewEncoder(os.Stdout) encoder.SetIndent("", " ") encoder.Encode(stats) }