topfans/backend/gateway/cmd/cleanup-orphan-avatars/main.go
2026-06-02 21:10:35 +08:00

242 lines
6.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package main 提供 OSS 头像孤儿文件清理工具。
//
// 用法:
//
// go run ./gateway/cmd/cleanup-orphan-avatars \
// --older-than-days 7 # 只看 7 天前的对象
// --limit 100 # 一次最多处理 100 个
// --delete # 真正删除;缺省为 dry-run只打印不打
//
// DB / OSS 配置走环境变量(与 userService 一致):
// - DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME
// - OSS_REGION, OSS_BUCKET_NAME, OSS_STS_ROLE_ARN,
// OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, OSS_AVATAR_DIR
//
// 默认扫描前缀 = $OSS_AVATAR_DIR"avatar/"),用 --prefix 覆盖。
package main
import (
"flag"
"fmt"
"log"
"os"
"strconv"
"time"
"github.com/topfans/backend/gateway/config"
"github.com/topfans/backend/gateway/pkg/ossutil"
"github.com/topfans/backend/pkg/database"
"github.com/topfans/backend/pkg/models"
"gorm.io/gorm"
)
var (
prefix = flag.String("prefix", "", "OSS 前缀,默认 = cfg.OSS.AvatarDir")
olderThanDays = flag.Int("older-than-days", 0, "仅标记 mtime 早于 N 天的对象0 = 不过滤)")
limit = flag.Int("limit", 0, "最多处理多少个孤儿0 = 不限)")
deleteMode = flag.Bool("delete", false, "真正删除 OSS 对象;缺省为 dry-run")
batchSize = flag.Int("batch-size", 100, "每处理 N 个对象打印一次进度")
dbHost = flag.String("db-host", getEnv("DB_HOST", "localhost"), "DB host")
dbPort = flag.Int("db-port", getEnvInt("DB_PORT", 5432), "DB port")
dbUser = flag.String("db-user", getEnv("DB_USER", "postgres"), "DB user")
dbPassword = flag.String("db-password", getEnv("DB_PASSWORD", ""), "DB password")
dbName = flag.String("db-name", getEnv("DB_NAME", "top-fans"), "DB name")
)
func getEnv(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}
func getEnvInt(key string, fallback int) int {
if v := os.Getenv(key); v != "" {
if n, err := strconv.Atoi(v); err == nil {
return n
}
}
return fallback
}
func main() {
flag.Parse()
mode := "DRY-RUN"
if *deleteMode {
mode = "DELETE"
}
log.SetFlags(log.LstdFlags | log.Lmicroseconds)
log.Printf("=== cleanup-orphan-avatars [%s] ===", mode)
cfg := config.Load()
scanPrefix := *prefix
if scanPrefix == "" {
scanPrefix = cfg.OSS.AvatarDir
}
if scanPrefix == "" {
log.Fatalf("--prefix 为空且 cfg.OSS.AvatarDir 未设置,请显式传 --prefix")
}
log.Printf("scan prefix: %s", scanPrefix)
if *olderThanDays > 0 {
log.Printf("age filter: only mtime < %d days ago", *olderThanDays)
}
if *limit > 0 {
log.Printf("limit: max %d orphans to process", *limit)
}
if err := database.Init(database.Config{
Host: *dbHost,
Port: *dbPort,
User: *dbUser,
Password: *dbPassword,
DBName: *dbName,
SSLMode: "disable",
TimeZone: "Asia/Shanghai",
}); err != nil {
log.Fatalf("connect DB failed: %v", err)
}
defer func() { _ = database.Close() }()
db := database.GetDB()
referenced, skipped := collectReferencedKeys(db, &cfg.OSS)
log.Printf("referenced keys: %d (含 fan_profiles),解析失败/外链跳过: %d", referenced.cardinality(), skipped)
objs, err := ossutil.List(&cfg.OSS, scanPrefix, 0)
if err != nil {
log.Fatalf("OSS List failed: %v", err)
}
log.Printf("OSS objects under %q: %d", scanPrefix, len(objs))
cutoff := time.Time{}
if *olderThanDays > 0 {
cutoff = time.Now().Add(-time.Duration(*olderThanDays) * 24 * time.Hour)
}
var (
orphans []ossutil.ObjectInfo
skippedNew int
skippedRef int
)
for _, obj := range objs {
if referenced.contains(obj.Key) {
skippedRef++
continue
}
if !cutoff.IsZero() && obj.LastModified.After(cutoff) {
skippedNew++
continue
}
orphans = append(orphans, obj)
if *limit > 0 && len(orphans) >= *limit {
break
}
}
log.Printf("candidates: %d (referenced skip: %d, age skip: %d)", len(orphans), skippedRef, skippedNew)
if len(orphans) == 0 {
log.Printf("nothing to do. bye.")
return
}
var totalSize int64
for _, o := range orphans {
totalSize += o.Size
}
log.Printf("would free: %d files, %s", len(orphans), humanBytes(totalSize))
processed := 0
for _, o := range orphans {
processed++
if *deleteMode {
if err := ossutil.Delete(&cfg.OSS, o.Key); err != nil {
log.Printf("[FAIL] delete %s: %v", o.Key, err)
continue
}
log.Printf("[DEL ] %s size=%s mtime=%s", o.Key, humanBytes(o.Size), o.LastModified.Format(time.RFC3339))
} else {
log.Printf("[SKIP] %s size=%s mtime=%s", o.Key, humanBytes(o.Size), o.LastModified.Format(time.RFC3339))
}
if processed%*batchSize == 0 {
log.Printf("--- progress: %d / %d ---", processed, len(orphans))
}
}
log.Printf("done. processed=%d", processed)
}
// referencedKeySet 记录"正在被数据库引用的 OSS key"。
// 用 map[string]struct{} 而不是 sync.Map —— 工具是单 goroutine。
type referencedKeySet struct {
m map[string]struct{}
}
func (s *referencedKeySet) add(k string) {
if s.m == nil {
s.m = make(map[string]struct{})
}
s.m[k] = struct{}{}
}
func (s *referencedKeySet) contains(k string) bool {
_, ok := s.m[k]
return ok
}
func (s *referencedKeySet) cardinality() int {
return len(s.m)
}
// collectReferencedKeys 从 users / fan_profiles 收集所有头像 URL
// 通过 cfg 校验 host 后转成 OSS key 入集合。
// 返回:(set, 解析失败/外链 数量)。
func collectReferencedKeys(db *gorm.DB, cfg *config.OSSConfig) (referencedKeySet, int) {
set := referencedKeySet{}
skipped := 0
var userURLs []string
if err := db.Model(&models.User{}).
Where("avatar_url IS NOT NULL AND avatar_url <> '' AND deleted_at IS NULL").
Pluck("avatar_url", &userURLs).Error; err != nil {
log.Fatalf("query users.avatar_url failed: %v", err)
}
for _, u := range userURLs {
k, err := ossutil.ExtractKeyFromPublicURL(cfg, u)
if err != nil {
skipped++
continue
}
set.add(k)
}
var profileURLs []string
if err := db.Model(&models.FanProfile{}).
Where("avatar_url IS NOT NULL AND avatar_url <> ''").
Pluck("avatar_url", &profileURLs).Error; err != nil {
log.Fatalf("query fan_profiles.avatar_url failed: %v", err)
}
for _, u := range profileURLs {
k, err := ossutil.ExtractKeyFromPublicURL(cfg, u)
if err != nil {
skipped++
continue
}
set.add(k)
}
return set, skipped
}
func humanBytes(n int64) string {
const k = 1024
if n < k {
return fmt.Sprintf("%dB", n)
}
if n < k*k {
return fmt.Sprintf("%.1fKB", float64(n)/k)
}
if n < k*k*k {
return fmt.Sprintf("%.1fMB", float64(n)/(k*k))
}
return fmt.Sprintf("%.1fGB", float64(n)/(k*k*k))
}