443 lines
12 KiB
Go
443 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/csv"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type EmailStats struct {
|
|
TotalEmails int `json:"total_emails"`
|
|
TopSenders []SenderInfo `json:"top_senders"`
|
|
TopDomains []DomainInfo `json:"top_domains"`
|
|
Categories map[string]int `json:"categories"`
|
|
SubjectPatterns []PatternInfo `json:"subject_patterns"`
|
|
TimeAnalysis TimeStats `json:"time_analysis"`
|
|
}
|
|
|
|
type SenderInfo struct {
|
|
Email string `json:"email"`
|
|
Count int `json:"count"`
|
|
Domain string `json:"domain"`
|
|
}
|
|
|
|
type DomainInfo struct {
|
|
Domain string `json:"domain"`
|
|
Count int `json:"count"`
|
|
Type string `json:"type"`
|
|
}
|
|
|
|
type PatternInfo struct {
|
|
Pattern string `json:"pattern"`
|
|
Count int `json:"count"`
|
|
Examples []string `json:"examples"`
|
|
}
|
|
|
|
type TimeStats struct {
|
|
EmailsByYear map[string]int `json:"emails_by_year"`
|
|
EmailsByMonth map[string]int `json:"emails_by_month"`
|
|
OldestEmail string `json:"oldest_email"`
|
|
NewestEmail string `json:"newest_email"`
|
|
}
|
|
|
|
func main() {
|
|
if len(os.Args) < 2 {
|
|
fmt.Println("Usage: go run analyze.go <csv_file> [output_format]")
|
|
fmt.Println(" csv_file: path to CSV file from main.go")
|
|
fmt.Println(" output_format: json (default) or summary")
|
|
os.Exit(1)
|
|
}
|
|
|
|
csvFile := os.Args[1]
|
|
outputFormat := "json"
|
|
if len(os.Args) > 2 {
|
|
outputFormat = strings.ToLower(os.Args[2])
|
|
}
|
|
|
|
emails, err := loadEmailsFromCSV(csvFile)
|
|
if err != nil {
|
|
log.Fatalf("Error loading CSV: %v", err)
|
|
}
|
|
|
|
fmt.Printf("Analyzing %d emails...\n", len(emails))
|
|
|
|
stats := analyzeEmails(emails)
|
|
|
|
switch outputFormat {
|
|
case "summary":
|
|
printSummary(stats)
|
|
case "json":
|
|
outputJSON(stats)
|
|
default:
|
|
fmt.Printf("Unknown output format: %s\n", outputFormat)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func loadEmailsFromCSV(filename string) ([]EmailInfo, error) {
|
|
file, err := os.Open(filename)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer file.Close()
|
|
|
|
reader := csv.NewReader(file)
|
|
records, err := reader.ReadAll()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var emails []EmailInfo
|
|
for i, record := range records {
|
|
if i == 0 && record[0] == "Sender" {
|
|
continue
|
|
}
|
|
|
|
if len(record) >= 4 {
|
|
emails = append(emails, EmailInfo{
|
|
Sender: record[0],
|
|
Subject: record[1],
|
|
Date: record[2],
|
|
ID: record[3],
|
|
})
|
|
}
|
|
}
|
|
|
|
return emails, nil
|
|
}
|
|
|
|
func analyzeEmails(emails []EmailInfo) EmailStats {
|
|
stats := EmailStats{
|
|
TotalEmails: len(emails),
|
|
Categories: make(map[string]int),
|
|
TimeAnalysis: TimeStats{
|
|
EmailsByYear: make(map[string]int),
|
|
EmailsByMonth: make(map[string]int),
|
|
},
|
|
}
|
|
|
|
senderCounts := make(map[string]int)
|
|
domainCounts := make(map[string]int)
|
|
patternCounts := make(map[string][]string)
|
|
|
|
var oldestTime, newestTime time.Time
|
|
|
|
for _, email := range emails {
|
|
// Sender analysis
|
|
senderCounts[email.Sender]++
|
|
|
|
// Domain analysis
|
|
domain := extractDomain(email.Sender)
|
|
if domain != "" {
|
|
domainCounts[domain]++
|
|
}
|
|
|
|
// Subject pattern analysis
|
|
patterns := detectSubjectPatterns(email.Subject)
|
|
for _, pattern := range patterns {
|
|
patternCounts[pattern] = append(patternCounts[pattern], email.Subject)
|
|
}
|
|
|
|
// Category analysis
|
|
category := categorizeEmail(email.Sender, email.Subject)
|
|
stats.Categories[category]++
|
|
|
|
// Time analysis
|
|
if emailTime, err := parseEmailDate(email.Date); err == nil {
|
|
year := emailTime.Format("2006")
|
|
month := emailTime.Format("2006-01")
|
|
|
|
stats.TimeAnalysis.EmailsByYear[year]++
|
|
stats.TimeAnalysis.EmailsByMonth[month]++
|
|
|
|
if oldestTime.IsZero() || emailTime.Before(oldestTime) {
|
|
oldestTime = emailTime
|
|
stats.TimeAnalysis.OldestEmail = email.Date
|
|
}
|
|
if newestTime.IsZero() || emailTime.After(newestTime) {
|
|
newestTime = emailTime
|
|
stats.TimeAnalysis.NewestEmail = email.Date
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert maps to sorted slices
|
|
stats.TopSenders = sortSenders(senderCounts)
|
|
stats.TopDomains = sortDomains(domainCounts)
|
|
stats.SubjectPatterns = sortPatterns(patternCounts)
|
|
|
|
return stats
|
|
}
|
|
|
|
func extractDomain(email string) string {
|
|
parts := strings.Split(email, "@")
|
|
if len(parts) != 2 {
|
|
// Handle cases like "Name <email@domain.com>"
|
|
re := regexp.MustCompile(`<([^@]+@[^>]+)>`)
|
|
matches := re.FindStringSubmatch(email)
|
|
if len(matches) > 1 {
|
|
parts = strings.Split(matches[1], "@")
|
|
if len(parts) == 2 {
|
|
return strings.ToLower(strings.TrimSpace(parts[1]))
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
return strings.ToLower(strings.TrimSpace(parts[1]))
|
|
}
|
|
|
|
func detectSubjectPatterns(subject string) []string {
|
|
var patterns []string
|
|
|
|
subject = strings.ToLower(subject)
|
|
|
|
// Newsletter patterns
|
|
if strings.Contains(subject, "newsletter") || strings.Contains(subject, "weekly") ||
|
|
strings.Contains(subject, "monthly") || strings.Contains(subject, "digest") {
|
|
patterns = append(patterns, "newsletter")
|
|
}
|
|
|
|
// Automated patterns
|
|
if strings.HasPrefix(subject, "re:") {
|
|
patterns = append(patterns, "reply")
|
|
}
|
|
if strings.HasPrefix(subject, "fwd:") || strings.HasPrefix(subject, "fw:") {
|
|
patterns = append(patterns, "forward")
|
|
}
|
|
|
|
// Notification patterns
|
|
if strings.Contains(subject, "notification") || strings.Contains(subject, "alert") ||
|
|
strings.Contains(subject, "reminder") {
|
|
patterns = append(patterns, "notification")
|
|
}
|
|
|
|
// Commercial patterns
|
|
if strings.Contains(subject, "sale") || strings.Contains(subject, "deal") ||
|
|
strings.Contains(subject, "offer") || strings.Contains(subject, "discount") ||
|
|
strings.Contains(subject, "%") || strings.Contains(subject, "free") {
|
|
patterns = append(patterns, "promotional")
|
|
}
|
|
|
|
// Update patterns
|
|
if strings.Contains(subject, "update") || strings.Contains(subject, "new version") ||
|
|
strings.Contains(subject, "release") {
|
|
patterns = append(patterns, "update")
|
|
}
|
|
|
|
// Receipt/confirmation patterns
|
|
if strings.Contains(subject, "receipt") || strings.Contains(subject, "confirmation") ||
|
|
strings.Contains(subject, "invoice") || strings.Contains(subject, "payment") {
|
|
patterns = append(patterns, "transactional")
|
|
}
|
|
|
|
return patterns
|
|
}
|
|
|
|
func categorizeEmail(sender, subject string) string {
|
|
domain := extractDomain(sender)
|
|
senderLower := strings.ToLower(sender)
|
|
subjectLower := strings.ToLower(subject)
|
|
|
|
// Social networks
|
|
socialDomains := []string{"facebook.com", "twitter.com", "linkedin.com", "instagram.com",
|
|
"tiktok.com", "youtube.com", "reddit.com"}
|
|
for _, social := range socialDomains {
|
|
if strings.Contains(domain, social) {
|
|
return "social"
|
|
}
|
|
}
|
|
|
|
// Financial
|
|
if strings.Contains(subjectLower, "payment") || strings.Contains(subjectLower, "invoice") ||
|
|
strings.Contains(subjectLower, "receipt") || strings.Contains(domain, "bank") ||
|
|
strings.Contains(domain, "paypal") || strings.Contains(domain, "stripe") {
|
|
return "finance"
|
|
}
|
|
|
|
// Travel
|
|
if strings.Contains(domain, "booking") || strings.Contains(domain, "airbnb") ||
|
|
strings.Contains(domain, "hotel") || strings.Contains(domain, "airline") ||
|
|
strings.Contains(subjectLower, "flight") || strings.Contains(subjectLower, "reservation") {
|
|
return "travel"
|
|
}
|
|
|
|
// Shopping
|
|
if strings.Contains(domain, "amazon") || strings.Contains(domain, "ebay") ||
|
|
strings.Contains(subjectLower, "order") || strings.Contains(subjectLower, "shipping") {
|
|
return "shopping"
|
|
}
|
|
|
|
// Newsletters/Marketing
|
|
if strings.Contains(senderLower, "noreply") || strings.Contains(senderLower, "no-reply") ||
|
|
strings.Contains(subjectLower, "newsletter") || strings.Contains(subjectLower, "unsubscribe") {
|
|
return "newsletters"
|
|
}
|
|
|
|
// Work-related
|
|
if strings.Contains(domain, "slack") || strings.Contains(domain, "github") ||
|
|
strings.Contains(domain, "jira") || strings.Contains(domain, "atlassian") {
|
|
return "work"
|
|
}
|
|
|
|
return "personal"
|
|
}
|
|
|
|
func parseEmailDate(dateStr string) (time.Time, error) {
|
|
formats := []string{
|
|
time.RFC1123Z,
|
|
time.RFC1123,
|
|
"Mon, 2 Jan 2006 15:04:05 -0700",
|
|
"2 Jan 2006 15:04:05 -0700",
|
|
"2006-01-02T15:04:05Z07:00",
|
|
"2006-01-02 15:04:05",
|
|
}
|
|
|
|
for _, format := range formats {
|
|
if t, err := time.Parse(format, dateStr); err == nil {
|
|
return t, nil
|
|
}
|
|
}
|
|
|
|
return time.Time{}, fmt.Errorf("unable to parse date: %s", dateStr)
|
|
}
|
|
|
|
func sortSenders(senderCounts map[string]int) []SenderInfo {
|
|
var senders []SenderInfo
|
|
for email, count := range senderCounts {
|
|
senders = append(senders, SenderInfo{
|
|
Email: email,
|
|
Count: count,
|
|
Domain: extractDomain(email),
|
|
})
|
|
}
|
|
|
|
sort.Slice(senders, func(i, j int) bool {
|
|
return senders[i].Count > senders[j].Count
|
|
})
|
|
|
|
if len(senders) > 20 {
|
|
senders = senders[:20]
|
|
}
|
|
|
|
return senders
|
|
}
|
|
|
|
func sortDomains(domainCounts map[string]int) []DomainInfo {
|
|
var domains []DomainInfo
|
|
for domain, count := range domainCounts {
|
|
domainType := categorizeDomain(domain)
|
|
domains = append(domains, DomainInfo{
|
|
Domain: domain,
|
|
Count: count,
|
|
Type: domainType,
|
|
})
|
|
}
|
|
|
|
sort.Slice(domains, func(i, j int) bool {
|
|
return domains[i].Count > domains[j].Count
|
|
})
|
|
|
|
if len(domains) > 15 {
|
|
domains = domains[:15]
|
|
}
|
|
|
|
return domains
|
|
}
|
|
|
|
func categorizeDomain(domain string) string {
|
|
domain = strings.ToLower(domain)
|
|
|
|
if strings.Contains(domain, "gmail") || strings.Contains(domain, "yahoo") ||
|
|
strings.Contains(domain, "hotmail") || strings.Contains(domain, "outlook") {
|
|
return "personal"
|
|
}
|
|
|
|
if strings.Contains(domain, "facebook") || strings.Contains(domain, "twitter") ||
|
|
strings.Contains(domain, "linkedin") || strings.Contains(domain, "instagram") {
|
|
return "social"
|
|
}
|
|
|
|
if strings.Contains(domain, "amazon") || strings.Contains(domain, "ebay") ||
|
|
strings.Contains(domain, "shop") || strings.Contains(domain, "store") {
|
|
return "commerce"
|
|
}
|
|
|
|
if strings.Contains(domain, "noreply") || strings.Contains(domain, "no-reply") ||
|
|
strings.Contains(domain, "mail") {
|
|
return "automated"
|
|
}
|
|
|
|
return "business"
|
|
}
|
|
|
|
func sortPatterns(patternCounts map[string][]string) []PatternInfo {
|
|
var patterns []PatternInfo
|
|
for pattern, examples := range patternCounts {
|
|
// Limit examples to 3
|
|
limitedExamples := examples
|
|
if len(limitedExamples) > 3 {
|
|
limitedExamples = limitedExamples[:3]
|
|
}
|
|
|
|
patterns = append(patterns, PatternInfo{
|
|
Pattern: pattern,
|
|
Count: len(examples),
|
|
Examples: limitedExamples,
|
|
})
|
|
}
|
|
|
|
sort.Slice(patterns, func(i, j int) bool {
|
|
return patterns[i].Count > patterns[j].Count
|
|
})
|
|
|
|
return patterns
|
|
}
|
|
|
|
func printSummary(stats EmailStats) {
|
|
fmt.Printf("\n=== EMAIL ANALYSIS SUMMARY ===\n")
|
|
fmt.Printf("Total emails analyzed: %d\n\n", stats.TotalEmails)
|
|
|
|
fmt.Printf("TOP SENDERS:\n")
|
|
for i, sender := range stats.TopSenders {
|
|
if i >= 10 {
|
|
break
|
|
}
|
|
fmt.Printf(" %d. %s (%d emails)\n", i+1, sender.Email, sender.Count)
|
|
}
|
|
|
|
fmt.Printf("\nTOP DOMAINS:\n")
|
|
for i, domain := range stats.TopDomains {
|
|
if i >= 10 {
|
|
break
|
|
}
|
|
fmt.Printf(" %d. %s (%d emails, %s)\n", i+1, domain.Domain, domain.Count, domain.Type)
|
|
}
|
|
|
|
fmt.Printf("\nEMAIL CATEGORIES:\n")
|
|
for category, count := range stats.Categories {
|
|
percentage := float64(count) / float64(stats.TotalEmails) * 100
|
|
fmt.Printf(" %s: %d emails (%.1f%%)\n", category, count, percentage)
|
|
}
|
|
|
|
fmt.Printf("\nSUBJECT PATTERNS:\n")
|
|
for _, pattern := range stats.SubjectPatterns {
|
|
fmt.Printf(" %s: %d emails\n", pattern.Pattern, pattern.Count)
|
|
}
|
|
|
|
fmt.Printf("\nTIME ANALYSIS:\n")
|
|
fmt.Printf(" Date range: %s to %s\n", stats.TimeAnalysis.OldestEmail, stats.TimeAnalysis.NewestEmail)
|
|
fmt.Printf(" Years with emails: %d\n", len(stats.TimeAnalysis.EmailsByYear))
|
|
}
|
|
|
|
func outputJSON(stats EmailStats) {
|
|
encoder := json.NewEncoder(os.Stdout)
|
|
encoder.SetIndent("", " ")
|
|
encoder.Encode(stats)
|
|
} |