initiate
This commit is contained in:
443
analyze.go
Normal file
443
analyze.go
Normal file
@@ -0,0 +1,443 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type EmailStats struct {
|
||||
TotalEmails int `json:"total_emails"`
|
||||
TopSenders []SenderInfo `json:"top_senders"`
|
||||
TopDomains []DomainInfo `json:"top_domains"`
|
||||
Categories map[string]int `json:"categories"`
|
||||
SubjectPatterns []PatternInfo `json:"subject_patterns"`
|
||||
TimeAnalysis TimeStats `json:"time_analysis"`
|
||||
}
|
||||
|
||||
type SenderInfo struct {
|
||||
Email string `json:"email"`
|
||||
Count int `json:"count"`
|
||||
Domain string `json:"domain"`
|
||||
}
|
||||
|
||||
type DomainInfo struct {
|
||||
Domain string `json:"domain"`
|
||||
Count int `json:"count"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type PatternInfo struct {
|
||||
Pattern string `json:"pattern"`
|
||||
Count int `json:"count"`
|
||||
Examples []string `json:"examples"`
|
||||
}
|
||||
|
||||
type TimeStats struct {
|
||||
EmailsByYear map[string]int `json:"emails_by_year"`
|
||||
EmailsByMonth map[string]int `json:"emails_by_month"`
|
||||
OldestEmail string `json:"oldest_email"`
|
||||
NewestEmail string `json:"newest_email"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Println("Usage: go run analyze.go <csv_file> [output_format]")
|
||||
fmt.Println(" csv_file: path to CSV file from main.go")
|
||||
fmt.Println(" output_format: json (default) or summary")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
csvFile := os.Args[1]
|
||||
outputFormat := "json"
|
||||
if len(os.Args) > 2 {
|
||||
outputFormat = strings.ToLower(os.Args[2])
|
||||
}
|
||||
|
||||
emails, err := loadEmailsFromCSV(csvFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Error loading CSV: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Analyzing %d emails...\n", len(emails))
|
||||
|
||||
stats := analyzeEmails(emails)
|
||||
|
||||
switch outputFormat {
|
||||
case "summary":
|
||||
printSummary(stats)
|
||||
case "json":
|
||||
outputJSON(stats)
|
||||
default:
|
||||
fmt.Printf("Unknown output format: %s\n", outputFormat)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func loadEmailsFromCSV(filename string) ([]EmailInfo, error) {
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
records, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var emails []EmailInfo
|
||||
for i, record := range records {
|
||||
if i == 0 && record[0] == "Sender" {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(record) >= 4 {
|
||||
emails = append(emails, EmailInfo{
|
||||
Sender: record[0],
|
||||
Subject: record[1],
|
||||
Date: record[2],
|
||||
ID: record[3],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return emails, nil
|
||||
}
|
||||
|
||||
func analyzeEmails(emails []EmailInfo) EmailStats {
|
||||
stats := EmailStats{
|
||||
TotalEmails: len(emails),
|
||||
Categories: make(map[string]int),
|
||||
TimeAnalysis: TimeStats{
|
||||
EmailsByYear: make(map[string]int),
|
||||
EmailsByMonth: make(map[string]int),
|
||||
},
|
||||
}
|
||||
|
||||
senderCounts := make(map[string]int)
|
||||
domainCounts := make(map[string]int)
|
||||
patternCounts := make(map[string][]string)
|
||||
|
||||
var oldestTime, newestTime time.Time
|
||||
|
||||
for _, email := range emails {
|
||||
// Sender analysis
|
||||
senderCounts[email.Sender]++
|
||||
|
||||
// Domain analysis
|
||||
domain := extractDomain(email.Sender)
|
||||
if domain != "" {
|
||||
domainCounts[domain]++
|
||||
}
|
||||
|
||||
// Subject pattern analysis
|
||||
patterns := detectSubjectPatterns(email.Subject)
|
||||
for _, pattern := range patterns {
|
||||
patternCounts[pattern] = append(patternCounts[pattern], email.Subject)
|
||||
}
|
||||
|
||||
// Category analysis
|
||||
category := categorizeEmail(email.Sender, email.Subject)
|
||||
stats.Categories[category]++
|
||||
|
||||
// Time analysis
|
||||
if emailTime, err := parseEmailDate(email.Date); err == nil {
|
||||
year := emailTime.Format("2006")
|
||||
month := emailTime.Format("2006-01")
|
||||
|
||||
stats.TimeAnalysis.EmailsByYear[year]++
|
||||
stats.TimeAnalysis.EmailsByMonth[month]++
|
||||
|
||||
if oldestTime.IsZero() || emailTime.Before(oldestTime) {
|
||||
oldestTime = emailTime
|
||||
stats.TimeAnalysis.OldestEmail = email.Date
|
||||
}
|
||||
if newestTime.IsZero() || emailTime.After(newestTime) {
|
||||
newestTime = emailTime
|
||||
stats.TimeAnalysis.NewestEmail = email.Date
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert maps to sorted slices
|
||||
stats.TopSenders = sortSenders(senderCounts)
|
||||
stats.TopDomains = sortDomains(domainCounts)
|
||||
stats.SubjectPatterns = sortPatterns(patternCounts)
|
||||
|
||||
return stats
|
||||
}
|
||||
|
||||
func extractDomain(email string) string {
|
||||
parts := strings.Split(email, "@")
|
||||
if len(parts) != 2 {
|
||||
// Handle cases like "Name <email@domain.com>"
|
||||
re := regexp.MustCompile(`<([^@]+@[^>]+)>`)
|
||||
matches := re.FindStringSubmatch(email)
|
||||
if len(matches) > 1 {
|
||||
parts = strings.Split(matches[1], "@")
|
||||
if len(parts) == 2 {
|
||||
return strings.ToLower(strings.TrimSpace(parts[1]))
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return strings.ToLower(strings.TrimSpace(parts[1]))
|
||||
}
|
||||
|
||||
func detectSubjectPatterns(subject string) []string {
|
||||
var patterns []string
|
||||
|
||||
subject = strings.ToLower(subject)
|
||||
|
||||
// Newsletter patterns
|
||||
if strings.Contains(subject, "newsletter") || strings.Contains(subject, "weekly") ||
|
||||
strings.Contains(subject, "monthly") || strings.Contains(subject, "digest") {
|
||||
patterns = append(patterns, "newsletter")
|
||||
}
|
||||
|
||||
// Automated patterns
|
||||
if strings.HasPrefix(subject, "re:") {
|
||||
patterns = append(patterns, "reply")
|
||||
}
|
||||
if strings.HasPrefix(subject, "fwd:") || strings.HasPrefix(subject, "fw:") {
|
||||
patterns = append(patterns, "forward")
|
||||
}
|
||||
|
||||
// Notification patterns
|
||||
if strings.Contains(subject, "notification") || strings.Contains(subject, "alert") ||
|
||||
strings.Contains(subject, "reminder") {
|
||||
patterns = append(patterns, "notification")
|
||||
}
|
||||
|
||||
// Commercial patterns
|
||||
if strings.Contains(subject, "sale") || strings.Contains(subject, "deal") ||
|
||||
strings.Contains(subject, "offer") || strings.Contains(subject, "discount") ||
|
||||
strings.Contains(subject, "%") || strings.Contains(subject, "free") {
|
||||
patterns = append(patterns, "promotional")
|
||||
}
|
||||
|
||||
// Update patterns
|
||||
if strings.Contains(subject, "update") || strings.Contains(subject, "new version") ||
|
||||
strings.Contains(subject, "release") {
|
||||
patterns = append(patterns, "update")
|
||||
}
|
||||
|
||||
// Receipt/confirmation patterns
|
||||
if strings.Contains(subject, "receipt") || strings.Contains(subject, "confirmation") ||
|
||||
strings.Contains(subject, "invoice") || strings.Contains(subject, "payment") {
|
||||
patterns = append(patterns, "transactional")
|
||||
}
|
||||
|
||||
return patterns
|
||||
}
|
||||
|
||||
func categorizeEmail(sender, subject string) string {
|
||||
domain := extractDomain(sender)
|
||||
senderLower := strings.ToLower(sender)
|
||||
subjectLower := strings.ToLower(subject)
|
||||
|
||||
// Social networks
|
||||
socialDomains := []string{"facebook.com", "twitter.com", "linkedin.com", "instagram.com",
|
||||
"tiktok.com", "youtube.com", "reddit.com"}
|
||||
for _, social := range socialDomains {
|
||||
if strings.Contains(domain, social) {
|
||||
return "social"
|
||||
}
|
||||
}
|
||||
|
||||
// Financial
|
||||
if strings.Contains(subjectLower, "payment") || strings.Contains(subjectLower, "invoice") ||
|
||||
strings.Contains(subjectLower, "receipt") || strings.Contains(domain, "bank") ||
|
||||
strings.Contains(domain, "paypal") || strings.Contains(domain, "stripe") {
|
||||
return "finance"
|
||||
}
|
||||
|
||||
// Travel
|
||||
if strings.Contains(domain, "booking") || strings.Contains(domain, "airbnb") ||
|
||||
strings.Contains(domain, "hotel") || strings.Contains(domain, "airline") ||
|
||||
strings.Contains(subjectLower, "flight") || strings.Contains(subjectLower, "reservation") {
|
||||
return "travel"
|
||||
}
|
||||
|
||||
// Shopping
|
||||
if strings.Contains(domain, "amazon") || strings.Contains(domain, "ebay") ||
|
||||
strings.Contains(subjectLower, "order") || strings.Contains(subjectLower, "shipping") {
|
||||
return "shopping"
|
||||
}
|
||||
|
||||
// Newsletters/Marketing
|
||||
if strings.Contains(senderLower, "noreply") || strings.Contains(senderLower, "no-reply") ||
|
||||
strings.Contains(subjectLower, "newsletter") || strings.Contains(subjectLower, "unsubscribe") {
|
||||
return "newsletters"
|
||||
}
|
||||
|
||||
// Work-related
|
||||
if strings.Contains(domain, "slack") || strings.Contains(domain, "github") ||
|
||||
strings.Contains(domain, "jira") || strings.Contains(domain, "atlassian") {
|
||||
return "work"
|
||||
}
|
||||
|
||||
return "personal"
|
||||
}
|
||||
|
||||
func parseEmailDate(dateStr string) (time.Time, error) {
|
||||
formats := []string{
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700",
|
||||
"2 Jan 2006 15:04:05 -0700",
|
||||
"2006-01-02T15:04:05Z07:00",
|
||||
"2006-01-02 15:04:05",
|
||||
}
|
||||
|
||||
for _, format := range formats {
|
||||
if t, err := time.Parse(format, dateStr); err == nil {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
|
||||
return time.Time{}, fmt.Errorf("unable to parse date: %s", dateStr)
|
||||
}
|
||||
|
||||
func sortSenders(senderCounts map[string]int) []SenderInfo {
|
||||
var senders []SenderInfo
|
||||
for email, count := range senderCounts {
|
||||
senders = append(senders, SenderInfo{
|
||||
Email: email,
|
||||
Count: count,
|
||||
Domain: extractDomain(email),
|
||||
})
|
||||
}
|
||||
|
||||
sort.Slice(senders, func(i, j int) bool {
|
||||
return senders[i].Count > senders[j].Count
|
||||
})
|
||||
|
||||
if len(senders) > 20 {
|
||||
senders = senders[:20]
|
||||
}
|
||||
|
||||
return senders
|
||||
}
|
||||
|
||||
func sortDomains(domainCounts map[string]int) []DomainInfo {
|
||||
var domains []DomainInfo
|
||||
for domain, count := range domainCounts {
|
||||
domainType := categorizeDomain(domain)
|
||||
domains = append(domains, DomainInfo{
|
||||
Domain: domain,
|
||||
Count: count,
|
||||
Type: domainType,
|
||||
})
|
||||
}
|
||||
|
||||
sort.Slice(domains, func(i, j int) bool {
|
||||
return domains[i].Count > domains[j].Count
|
||||
})
|
||||
|
||||
if len(domains) > 15 {
|
||||
domains = domains[:15]
|
||||
}
|
||||
|
||||
return domains
|
||||
}
|
||||
|
||||
func categorizeDomain(domain string) string {
|
||||
domain = strings.ToLower(domain)
|
||||
|
||||
if strings.Contains(domain, "gmail") || strings.Contains(domain, "yahoo") ||
|
||||
strings.Contains(domain, "hotmail") || strings.Contains(domain, "outlook") {
|
||||
return "personal"
|
||||
}
|
||||
|
||||
if strings.Contains(domain, "facebook") || strings.Contains(domain, "twitter") ||
|
||||
strings.Contains(domain, "linkedin") || strings.Contains(domain, "instagram") {
|
||||
return "social"
|
||||
}
|
||||
|
||||
if strings.Contains(domain, "amazon") || strings.Contains(domain, "ebay") ||
|
||||
strings.Contains(domain, "shop") || strings.Contains(domain, "store") {
|
||||
return "commerce"
|
||||
}
|
||||
|
||||
if strings.Contains(domain, "noreply") || strings.Contains(domain, "no-reply") ||
|
||||
strings.Contains(domain, "mail") {
|
||||
return "automated"
|
||||
}
|
||||
|
||||
return "business"
|
||||
}
|
||||
|
||||
func sortPatterns(patternCounts map[string][]string) []PatternInfo {
|
||||
var patterns []PatternInfo
|
||||
for pattern, examples := range patternCounts {
|
||||
// Limit examples to 3
|
||||
limitedExamples := examples
|
||||
if len(limitedExamples) > 3 {
|
||||
limitedExamples = limitedExamples[:3]
|
||||
}
|
||||
|
||||
patterns = append(patterns, PatternInfo{
|
||||
Pattern: pattern,
|
||||
Count: len(examples),
|
||||
Examples: limitedExamples,
|
||||
})
|
||||
}
|
||||
|
||||
sort.Slice(patterns, func(i, j int) bool {
|
||||
return patterns[i].Count > patterns[j].Count
|
||||
})
|
||||
|
||||
return patterns
|
||||
}
|
||||
|
||||
func printSummary(stats EmailStats) {
|
||||
fmt.Printf("\n=== EMAIL ANALYSIS SUMMARY ===\n")
|
||||
fmt.Printf("Total emails analyzed: %d\n\n", stats.TotalEmails)
|
||||
|
||||
fmt.Printf("TOP SENDERS:\n")
|
||||
for i, sender := range stats.TopSenders {
|
||||
if i >= 10 {
|
||||
break
|
||||
}
|
||||
fmt.Printf(" %d. %s (%d emails)\n", i+1, sender.Email, sender.Count)
|
||||
}
|
||||
|
||||
fmt.Printf("\nTOP DOMAINS:\n")
|
||||
for i, domain := range stats.TopDomains {
|
||||
if i >= 10 {
|
||||
break
|
||||
}
|
||||
fmt.Printf(" %d. %s (%d emails, %s)\n", i+1, domain.Domain, domain.Count, domain.Type)
|
||||
}
|
||||
|
||||
fmt.Printf("\nEMAIL CATEGORIES:\n")
|
||||
for category, count := range stats.Categories {
|
||||
percentage := float64(count) / float64(stats.TotalEmails) * 100
|
||||
fmt.Printf(" %s: %d emails (%.1f%%)\n", category, count, percentage)
|
||||
}
|
||||
|
||||
fmt.Printf("\nSUBJECT PATTERNS:\n")
|
||||
for _, pattern := range stats.SubjectPatterns {
|
||||
fmt.Printf(" %s: %d emails\n", pattern.Pattern, pattern.Count)
|
||||
}
|
||||
|
||||
fmt.Printf("\nTIME ANALYSIS:\n")
|
||||
fmt.Printf(" Date range: %s to %s\n", stats.TimeAnalysis.OldestEmail, stats.TimeAnalysis.NewestEmail)
|
||||
fmt.Printf(" Years with emails: %d\n", len(stats.TimeAnalysis.EmailsByYear))
|
||||
}
|
||||
|
||||
func outputJSON(stats EmailStats) {
|
||||
encoder := json.NewEncoder(os.Stdout)
|
||||
encoder.SetIndent("", " ")
|
||||
encoder.Encode(stats)
|
||||
}
|
||||
Reference in New Issue
Block a user