This commit is contained in:
orejav
2025-07-28 03:03:24 +03:00
commit b65c0956d2
10 changed files with 2092 additions and 0 deletions

443
analyze.go Normal file
View File

@@ -0,0 +1,443 @@
package main
import (
"encoding/csv"
"encoding/json"
"fmt"
"log"
"os"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
type EmailStats struct {
TotalEmails int `json:"total_emails"`
TopSenders []SenderInfo `json:"top_senders"`
TopDomains []DomainInfo `json:"top_domains"`
Categories map[string]int `json:"categories"`
SubjectPatterns []PatternInfo `json:"subject_patterns"`
TimeAnalysis TimeStats `json:"time_analysis"`
}
type SenderInfo struct {
Email string `json:"email"`
Count int `json:"count"`
Domain string `json:"domain"`
}
type DomainInfo struct {
Domain string `json:"domain"`
Count int `json:"count"`
Type string `json:"type"`
}
type PatternInfo struct {
Pattern string `json:"pattern"`
Count int `json:"count"`
Examples []string `json:"examples"`
}
type TimeStats struct {
EmailsByYear map[string]int `json:"emails_by_year"`
EmailsByMonth map[string]int `json:"emails_by_month"`
OldestEmail string `json:"oldest_email"`
NewestEmail string `json:"newest_email"`
}
func main() {
if len(os.Args) < 2 {
fmt.Println("Usage: go run analyze.go <csv_file> [output_format]")
fmt.Println(" csv_file: path to CSV file from main.go")
fmt.Println(" output_format: json (default) or summary")
os.Exit(1)
}
csvFile := os.Args[1]
outputFormat := "json"
if len(os.Args) > 2 {
outputFormat = strings.ToLower(os.Args[2])
}
emails, err := loadEmailsFromCSV(csvFile)
if err != nil {
log.Fatalf("Error loading CSV: %v", err)
}
fmt.Printf("Analyzing %d emails...\n", len(emails))
stats := analyzeEmails(emails)
switch outputFormat {
case "summary":
printSummary(stats)
case "json":
outputJSON(stats)
default:
fmt.Printf("Unknown output format: %s\n", outputFormat)
os.Exit(1)
}
}
func loadEmailsFromCSV(filename string) ([]EmailInfo, error) {
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()
reader := csv.NewReader(file)
records, err := reader.ReadAll()
if err != nil {
return nil, err
}
var emails []EmailInfo
for i, record := range records {
if i == 0 && record[0] == "Sender" {
continue
}
if len(record) >= 4 {
emails = append(emails, EmailInfo{
Sender: record[0],
Subject: record[1],
Date: record[2],
ID: record[3],
})
}
}
return emails, nil
}
func analyzeEmails(emails []EmailInfo) EmailStats {
stats := EmailStats{
TotalEmails: len(emails),
Categories: make(map[string]int),
TimeAnalysis: TimeStats{
EmailsByYear: make(map[string]int),
EmailsByMonth: make(map[string]int),
},
}
senderCounts := make(map[string]int)
domainCounts := make(map[string]int)
patternCounts := make(map[string][]string)
var oldestTime, newestTime time.Time
for _, email := range emails {
// Sender analysis
senderCounts[email.Sender]++
// Domain analysis
domain := extractDomain(email.Sender)
if domain != "" {
domainCounts[domain]++
}
// Subject pattern analysis
patterns := detectSubjectPatterns(email.Subject)
for _, pattern := range patterns {
patternCounts[pattern] = append(patternCounts[pattern], email.Subject)
}
// Category analysis
category := categorizeEmail(email.Sender, email.Subject)
stats.Categories[category]++
// Time analysis
if emailTime, err := parseEmailDate(email.Date); err == nil {
year := emailTime.Format("2006")
month := emailTime.Format("2006-01")
stats.TimeAnalysis.EmailsByYear[year]++
stats.TimeAnalysis.EmailsByMonth[month]++
if oldestTime.IsZero() || emailTime.Before(oldestTime) {
oldestTime = emailTime
stats.TimeAnalysis.OldestEmail = email.Date
}
if newestTime.IsZero() || emailTime.After(newestTime) {
newestTime = emailTime
stats.TimeAnalysis.NewestEmail = email.Date
}
}
}
// Convert maps to sorted slices
stats.TopSenders = sortSenders(senderCounts)
stats.TopDomains = sortDomains(domainCounts)
stats.SubjectPatterns = sortPatterns(patternCounts)
return stats
}
func extractDomain(email string) string {
parts := strings.Split(email, "@")
if len(parts) != 2 {
// Handle cases like "Name <email@domain.com>"
re := regexp.MustCompile(`<([^@]+@[^>]+)>`)
matches := re.FindStringSubmatch(email)
if len(matches) > 1 {
parts = strings.Split(matches[1], "@")
if len(parts) == 2 {
return strings.ToLower(strings.TrimSpace(parts[1]))
}
}
return ""
}
return strings.ToLower(strings.TrimSpace(parts[1]))
}
func detectSubjectPatterns(subject string) []string {
var patterns []string
subject = strings.ToLower(subject)
// Newsletter patterns
if strings.Contains(subject, "newsletter") || strings.Contains(subject, "weekly") ||
strings.Contains(subject, "monthly") || strings.Contains(subject, "digest") {
patterns = append(patterns, "newsletter")
}
// Automated patterns
if strings.HasPrefix(subject, "re:") {
patterns = append(patterns, "reply")
}
if strings.HasPrefix(subject, "fwd:") || strings.HasPrefix(subject, "fw:") {
patterns = append(patterns, "forward")
}
// Notification patterns
if strings.Contains(subject, "notification") || strings.Contains(subject, "alert") ||
strings.Contains(subject, "reminder") {
patterns = append(patterns, "notification")
}
// Commercial patterns
if strings.Contains(subject, "sale") || strings.Contains(subject, "deal") ||
strings.Contains(subject, "offer") || strings.Contains(subject, "discount") ||
strings.Contains(subject, "%") || strings.Contains(subject, "free") {
patterns = append(patterns, "promotional")
}
// Update patterns
if strings.Contains(subject, "update") || strings.Contains(subject, "new version") ||
strings.Contains(subject, "release") {
patterns = append(patterns, "update")
}
// Receipt/confirmation patterns
if strings.Contains(subject, "receipt") || strings.Contains(subject, "confirmation") ||
strings.Contains(subject, "invoice") || strings.Contains(subject, "payment") {
patterns = append(patterns, "transactional")
}
return patterns
}
func categorizeEmail(sender, subject string) string {
domain := extractDomain(sender)
senderLower := strings.ToLower(sender)
subjectLower := strings.ToLower(subject)
// Social networks
socialDomains := []string{"facebook.com", "twitter.com", "linkedin.com", "instagram.com",
"tiktok.com", "youtube.com", "reddit.com"}
for _, social := range socialDomains {
if strings.Contains(domain, social) {
return "social"
}
}
// Financial
if strings.Contains(subjectLower, "payment") || strings.Contains(subjectLower, "invoice") ||
strings.Contains(subjectLower, "receipt") || strings.Contains(domain, "bank") ||
strings.Contains(domain, "paypal") || strings.Contains(domain, "stripe") {
return "finance"
}
// Travel
if strings.Contains(domain, "booking") || strings.Contains(domain, "airbnb") ||
strings.Contains(domain, "hotel") || strings.Contains(domain, "airline") ||
strings.Contains(subjectLower, "flight") || strings.Contains(subjectLower, "reservation") {
return "travel"
}
// Shopping
if strings.Contains(domain, "amazon") || strings.Contains(domain, "ebay") ||
strings.Contains(subjectLower, "order") || strings.Contains(subjectLower, "shipping") {
return "shopping"
}
// Newsletters/Marketing
if strings.Contains(senderLower, "noreply") || strings.Contains(senderLower, "no-reply") ||
strings.Contains(subjectLower, "newsletter") || strings.Contains(subjectLower, "unsubscribe") {
return "newsletters"
}
// Work-related
if strings.Contains(domain, "slack") || strings.Contains(domain, "github") ||
strings.Contains(domain, "jira") || strings.Contains(domain, "atlassian") {
return "work"
}
return "personal"
}
func parseEmailDate(dateStr string) (time.Time, error) {
formats := []string{
time.RFC1123Z,
time.RFC1123,
"Mon, 2 Jan 2006 15:04:05 -0700",
"2 Jan 2006 15:04:05 -0700",
"2006-01-02T15:04:05Z07:00",
"2006-01-02 15:04:05",
}
for _, format := range formats {
if t, err := time.Parse(format, dateStr); err == nil {
return t, nil
}
}
return time.Time{}, fmt.Errorf("unable to parse date: %s", dateStr)
}
func sortSenders(senderCounts map[string]int) []SenderInfo {
var senders []SenderInfo
for email, count := range senderCounts {
senders = append(senders, SenderInfo{
Email: email,
Count: count,
Domain: extractDomain(email),
})
}
sort.Slice(senders, func(i, j int) bool {
return senders[i].Count > senders[j].Count
})
if len(senders) > 20 {
senders = senders[:20]
}
return senders
}
func sortDomains(domainCounts map[string]int) []DomainInfo {
var domains []DomainInfo
for domain, count := range domainCounts {
domainType := categorizeDomain(domain)
domains = append(domains, DomainInfo{
Domain: domain,
Count: count,
Type: domainType,
})
}
sort.Slice(domains, func(i, j int) bool {
return domains[i].Count > domains[j].Count
})
if len(domains) > 15 {
domains = domains[:15]
}
return domains
}
func categorizeDomain(domain string) string {
domain = strings.ToLower(domain)
if strings.Contains(domain, "gmail") || strings.Contains(domain, "yahoo") ||
strings.Contains(domain, "hotmail") || strings.Contains(domain, "outlook") {
return "personal"
}
if strings.Contains(domain, "facebook") || strings.Contains(domain, "twitter") ||
strings.Contains(domain, "linkedin") || strings.Contains(domain, "instagram") {
return "social"
}
if strings.Contains(domain, "amazon") || strings.Contains(domain, "ebay") ||
strings.Contains(domain, "shop") || strings.Contains(domain, "store") {
return "commerce"
}
if strings.Contains(domain, "noreply") || strings.Contains(domain, "no-reply") ||
strings.Contains(domain, "mail") {
return "automated"
}
return "business"
}
func sortPatterns(patternCounts map[string][]string) []PatternInfo {
var patterns []PatternInfo
for pattern, examples := range patternCounts {
// Limit examples to 3
limitedExamples := examples
if len(limitedExamples) > 3 {
limitedExamples = limitedExamples[:3]
}
patterns = append(patterns, PatternInfo{
Pattern: pattern,
Count: len(examples),
Examples: limitedExamples,
})
}
sort.Slice(patterns, func(i, j int) bool {
return patterns[i].Count > patterns[j].Count
})
return patterns
}
func printSummary(stats EmailStats) {
fmt.Printf("\n=== EMAIL ANALYSIS SUMMARY ===\n")
fmt.Printf("Total emails analyzed: %d\n\n", stats.TotalEmails)
fmt.Printf("TOP SENDERS:\n")
for i, sender := range stats.TopSenders {
if i >= 10 {
break
}
fmt.Printf(" %d. %s (%d emails)\n", i+1, sender.Email, sender.Count)
}
fmt.Printf("\nTOP DOMAINS:\n")
for i, domain := range stats.TopDomains {
if i >= 10 {
break
}
fmt.Printf(" %d. %s (%d emails, %s)\n", i+1, domain.Domain, domain.Count, domain.Type)
}
fmt.Printf("\nEMAIL CATEGORIES:\n")
for category, count := range stats.Categories {
percentage := float64(count) / float64(stats.TotalEmails) * 100
fmt.Printf(" %s: %d emails (%.1f%%)\n", category, count, percentage)
}
fmt.Printf("\nSUBJECT PATTERNS:\n")
for _, pattern := range stats.SubjectPatterns {
fmt.Printf(" %s: %d emails\n", pattern.Pattern, pattern.Count)
}
fmt.Printf("\nTIME ANALYSIS:\n")
fmt.Printf(" Date range: %s to %s\n", stats.TimeAnalysis.OldestEmail, stats.TimeAnalysis.NewestEmail)
fmt.Printf(" Years with emails: %d\n", len(stats.TimeAnalysis.EmailsByYear))
}
func outputJSON(stats EmailStats) {
encoder := json.NewEncoder(os.Stdout)
encoder.SetIndent("", " ")
encoder.Encode(stats)
}