// Package main 提供 OSS 头像孤儿文件清理工具。 // // 用法: // // go run ./gateway/cmd/cleanup-orphan-avatars \ // --older-than-days 7 # 只看 7 天前的对象 // --limit 100 # 一次最多处理 100 个 // --delete # 真正删除;缺省为 dry-run,只打印不打 // // DB / OSS 配置走环境变量(与 userService 一致): // - DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME // - OSS_REGION, OSS_BUCKET_NAME, OSS_STS_ROLE_ARN, // OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, OSS_AVATAR_DIR // // 默认扫描前缀 = $OSS_AVATAR_DIR("avatar/"),用 --prefix 覆盖。 package main import ( "flag" "fmt" "log" "os" "strconv" "time" "github.com/topfans/backend/gateway/config" "github.com/topfans/backend/gateway/pkg/ossutil" "github.com/topfans/backend/pkg/database" "github.com/topfans/backend/pkg/models" "gorm.io/gorm" ) var ( prefix = flag.String("prefix", "", "OSS 前缀,默认 = cfg.OSS.AvatarDir") olderThanDays = flag.Int("older-than-days", 0, "仅标记 mtime 早于 N 天的对象(0 = 不过滤)") limit = flag.Int("limit", 0, "最多处理多少个孤儿(0 = 不限)") deleteMode = flag.Bool("delete", false, "真正删除 OSS 对象;缺省为 dry-run") batchSize = flag.Int("batch-size", 100, "每处理 N 个对象打印一次进度") dbHost = flag.String("db-host", getEnv("DB_HOST", "localhost"), "DB host") dbPort = flag.Int("db-port", getEnvInt("DB_PORT", 5432), "DB port") dbUser = flag.String("db-user", getEnv("DB_USER", "postgres"), "DB user") dbPassword = flag.String("db-password", getEnv("DB_PASSWORD", ""), "DB password") dbName = flag.String("db-name", getEnv("DB_NAME", "top-fans"), "DB name") ) func getEnv(key, fallback string) string { if v := os.Getenv(key); v != "" { return v } return fallback } func getEnvInt(key string, fallback int) int { if v := os.Getenv(key); v != "" { if n, err := strconv.Atoi(v); err == nil { return n } } return fallback } func main() { flag.Parse() mode := "DRY-RUN" if *deleteMode { mode = "DELETE" } log.SetFlags(log.LstdFlags | log.Lmicroseconds) log.Printf("=== cleanup-orphan-avatars [%s] ===", mode) cfg := config.Load() scanPrefix := *prefix if scanPrefix == "" { scanPrefix = cfg.OSS.AvatarDir } if scanPrefix == "" { log.Fatalf("--prefix 为空且 cfg.OSS.AvatarDir 未设置,请显式传 --prefix") } log.Printf("scan prefix: %s", scanPrefix) if *olderThanDays > 0 { log.Printf("age filter: only mtime < %d days ago", *olderThanDays) } if *limit > 0 { log.Printf("limit: max %d orphans to process", *limit) } if err := database.Init(database.Config{ Host: *dbHost, Port: *dbPort, User: *dbUser, Password: *dbPassword, DBName: *dbName, SSLMode: "disable", TimeZone: "Asia/Shanghai", }); err != nil { log.Fatalf("connect DB failed: %v", err) } defer func() { _ = database.Close() }() db := database.GetDB() referenced, skipped := collectReferencedKeys(db, &cfg.OSS) log.Printf("referenced keys: %d (含 fan_profiles),解析失败/外链跳过: %d", referenced.cardinality(), skipped) objs, err := ossutil.List(&cfg.OSS, scanPrefix, 0) if err != nil { log.Fatalf("OSS List failed: %v", err) } log.Printf("OSS objects under %q: %d", scanPrefix, len(objs)) cutoff := time.Time{} if *olderThanDays > 0 { cutoff = time.Now().Add(-time.Duration(*olderThanDays) * 24 * time.Hour) } var ( orphans []ossutil.ObjectInfo skippedNew int skippedRef int ) for _, obj := range objs { if referenced.contains(obj.Key) { skippedRef++ continue } if !cutoff.IsZero() && obj.LastModified.After(cutoff) { skippedNew++ continue } orphans = append(orphans, obj) if *limit > 0 && len(orphans) >= *limit { break } } log.Printf("candidates: %d (referenced skip: %d, age skip: %d)", len(orphans), skippedRef, skippedNew) if len(orphans) == 0 { log.Printf("nothing to do. bye.") return } var totalSize int64 for _, o := range orphans { totalSize += o.Size } log.Printf("would free: %d files, %s", len(orphans), humanBytes(totalSize)) processed := 0 for _, o := range orphans { processed++ if *deleteMode { if err := ossutil.Delete(&cfg.OSS, o.Key); err != nil { log.Printf("[FAIL] delete %s: %v", o.Key, err) continue } log.Printf("[DEL ] %s size=%s mtime=%s", o.Key, humanBytes(o.Size), o.LastModified.Format(time.RFC3339)) } else { log.Printf("[SKIP] %s size=%s mtime=%s", o.Key, humanBytes(o.Size), o.LastModified.Format(time.RFC3339)) } if processed%*batchSize == 0 { log.Printf("--- progress: %d / %d ---", processed, len(orphans)) } } log.Printf("done. processed=%d", processed) } // referencedKeySet 记录"正在被数据库引用的 OSS key"。 // 用 map[string]struct{} 而不是 sync.Map —— 工具是单 goroutine。 type referencedKeySet struct { m map[string]struct{} } func (s *referencedKeySet) add(k string) { if s.m == nil { s.m = make(map[string]struct{}) } s.m[k] = struct{}{} } func (s *referencedKeySet) contains(k string) bool { _, ok := s.m[k] return ok } func (s *referencedKeySet) cardinality() int { return len(s.m) } // collectReferencedKeys 从 users / fan_profiles 收集所有头像 URL, // 通过 cfg 校验 host 后转成 OSS key 入集合。 // 返回:(set, 解析失败/外链 数量)。 func collectReferencedKeys(db *gorm.DB, cfg *config.OSSConfig) (referencedKeySet, int) { set := referencedKeySet{} skipped := 0 var userURLs []string if err := db.Model(&models.User{}). Where("avatar_url IS NOT NULL AND avatar_url <> '' AND deleted_at IS NULL"). Pluck("avatar_url", &userURLs).Error; err != nil { log.Fatalf("query users.avatar_url failed: %v", err) } for _, u := range userURLs { k, err := ossutil.ExtractKeyFromPublicURL(cfg, u) if err != nil { skipped++ continue } set.add(k) } var profileURLs []string if err := db.Model(&models.FanProfile{}). Where("avatar_url IS NOT NULL AND avatar_url <> ''"). Pluck("avatar_url", &profileURLs).Error; err != nil { log.Fatalf("query fan_profiles.avatar_url failed: %v", err) } for _, u := range profileURLs { k, err := ossutil.ExtractKeyFromPublicURL(cfg, u) if err != nil { skipped++ continue } set.add(k) } return set, skipped } func humanBytes(n int64) string { const k = 1024 if n < k { return fmt.Sprintf("%dB", n) } if n < k*k { return fmt.Sprintf("%.1fKB", float64(n)/k) } if n < k*k*k { return fmt.Sprintf("%.1fMB", float64(n)/(k*k)) } return fmt.Sprintf("%.1fGB", float64(n)/(k*k*k)) }