242 lines
6.6 KiB
Go
242 lines
6.6 KiB
Go
// Package main 提供 OSS 头像孤儿文件清理工具。
|
||
//
|
||
// 用法:
|
||
//
|
||
// go run ./gateway/cmd/cleanup-orphan-avatars \
|
||
// --older-than-days 7 # 只看 7 天前的对象
|
||
// --limit 100 # 一次最多处理 100 个
|
||
// --delete # 真正删除;缺省为 dry-run,只打印不打
|
||
//
|
||
// DB / OSS 配置走环境变量(与 userService 一致):
|
||
// - DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME
|
||
// - OSS_REGION, OSS_BUCKET_NAME, OSS_STS_ROLE_ARN,
|
||
// OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, OSS_AVATAR_DIR
|
||
//
|
||
// 默认扫描前缀 = $OSS_AVATAR_DIR("avatar/"),用 --prefix 覆盖。
|
||
package main
|
||
|
||
import (
|
||
"flag"
|
||
"fmt"
|
||
"log"
|
||
"os"
|
||
"strconv"
|
||
"time"
|
||
|
||
"github.com/topfans/backend/gateway/config"
|
||
"github.com/topfans/backend/gateway/pkg/ossutil"
|
||
"github.com/topfans/backend/pkg/database"
|
||
"github.com/topfans/backend/pkg/models"
|
||
"gorm.io/gorm"
|
||
)
|
||
|
||
var (
|
||
prefix = flag.String("prefix", "", "OSS 前缀,默认 = cfg.OSS.AvatarDir")
|
||
olderThanDays = flag.Int("older-than-days", 0, "仅标记 mtime 早于 N 天的对象(0 = 不过滤)")
|
||
limit = flag.Int("limit", 0, "最多处理多少个孤儿(0 = 不限)")
|
||
deleteMode = flag.Bool("delete", false, "真正删除 OSS 对象;缺省为 dry-run")
|
||
batchSize = flag.Int("batch-size", 100, "每处理 N 个对象打印一次进度")
|
||
dbHost = flag.String("db-host", getEnv("DB_HOST", "localhost"), "DB host")
|
||
dbPort = flag.Int("db-port", getEnvInt("DB_PORT", 5432), "DB port")
|
||
dbUser = flag.String("db-user", getEnv("DB_USER", "postgres"), "DB user")
|
||
dbPassword = flag.String("db-password", getEnv("DB_PASSWORD", ""), "DB password")
|
||
dbName = flag.String("db-name", getEnv("DB_NAME", "top-fans"), "DB name")
|
||
)
|
||
|
||
func getEnv(key, fallback string) string {
|
||
if v := os.Getenv(key); v != "" {
|
||
return v
|
||
}
|
||
return fallback
|
||
}
|
||
|
||
func getEnvInt(key string, fallback int) int {
|
||
if v := os.Getenv(key); v != "" {
|
||
if n, err := strconv.Atoi(v); err == nil {
|
||
return n
|
||
}
|
||
}
|
||
return fallback
|
||
}
|
||
|
||
func main() {
|
||
flag.Parse()
|
||
|
||
mode := "DRY-RUN"
|
||
if *deleteMode {
|
||
mode = "DELETE"
|
||
}
|
||
log.SetFlags(log.LstdFlags | log.Lmicroseconds)
|
||
log.Printf("=== cleanup-orphan-avatars [%s] ===", mode)
|
||
|
||
cfg := config.Load()
|
||
|
||
scanPrefix := *prefix
|
||
if scanPrefix == "" {
|
||
scanPrefix = cfg.OSS.AvatarDir
|
||
}
|
||
if scanPrefix == "" {
|
||
log.Fatalf("--prefix 为空且 cfg.OSS.AvatarDir 未设置,请显式传 --prefix")
|
||
}
|
||
log.Printf("scan prefix: %s", scanPrefix)
|
||
if *olderThanDays > 0 {
|
||
log.Printf("age filter: only mtime < %d days ago", *olderThanDays)
|
||
}
|
||
if *limit > 0 {
|
||
log.Printf("limit: max %d orphans to process", *limit)
|
||
}
|
||
|
||
if err := database.Init(database.Config{
|
||
Host: *dbHost,
|
||
Port: *dbPort,
|
||
User: *dbUser,
|
||
Password: *dbPassword,
|
||
DBName: *dbName,
|
||
SSLMode: "disable",
|
||
TimeZone: "Asia/Shanghai",
|
||
}); err != nil {
|
||
log.Fatalf("connect DB failed: %v", err)
|
||
}
|
||
defer func() { _ = database.Close() }()
|
||
|
||
db := database.GetDB()
|
||
referenced, skipped := collectReferencedKeys(db, &cfg.OSS)
|
||
log.Printf("referenced keys: %d (含 fan_profiles),解析失败/外链跳过: %d", referenced.cardinality(), skipped)
|
||
|
||
objs, err := ossutil.List(&cfg.OSS, scanPrefix, 0)
|
||
if err != nil {
|
||
log.Fatalf("OSS List failed: %v", err)
|
||
}
|
||
log.Printf("OSS objects under %q: %d", scanPrefix, len(objs))
|
||
|
||
cutoff := time.Time{}
|
||
if *olderThanDays > 0 {
|
||
cutoff = time.Now().Add(-time.Duration(*olderThanDays) * 24 * time.Hour)
|
||
}
|
||
|
||
var (
|
||
orphans []ossutil.ObjectInfo
|
||
skippedNew int
|
||
skippedRef int
|
||
)
|
||
for _, obj := range objs {
|
||
if referenced.contains(obj.Key) {
|
||
skippedRef++
|
||
continue
|
||
}
|
||
if !cutoff.IsZero() && obj.LastModified.After(cutoff) {
|
||
skippedNew++
|
||
continue
|
||
}
|
||
orphans = append(orphans, obj)
|
||
if *limit > 0 && len(orphans) >= *limit {
|
||
break
|
||
}
|
||
}
|
||
|
||
log.Printf("candidates: %d (referenced skip: %d, age skip: %d)", len(orphans), skippedRef, skippedNew)
|
||
if len(orphans) == 0 {
|
||
log.Printf("nothing to do. bye.")
|
||
return
|
||
}
|
||
|
||
var totalSize int64
|
||
for _, o := range orphans {
|
||
totalSize += o.Size
|
||
}
|
||
log.Printf("would free: %d files, %s", len(orphans), humanBytes(totalSize))
|
||
|
||
processed := 0
|
||
for _, o := range orphans {
|
||
processed++
|
||
if *deleteMode {
|
||
if err := ossutil.Delete(&cfg.OSS, o.Key); err != nil {
|
||
log.Printf("[FAIL] delete %s: %v", o.Key, err)
|
||
continue
|
||
}
|
||
log.Printf("[DEL ] %s size=%s mtime=%s", o.Key, humanBytes(o.Size), o.LastModified.Format(time.RFC3339))
|
||
} else {
|
||
log.Printf("[SKIP] %s size=%s mtime=%s", o.Key, humanBytes(o.Size), o.LastModified.Format(time.RFC3339))
|
||
}
|
||
if processed%*batchSize == 0 {
|
||
log.Printf("--- progress: %d / %d ---", processed, len(orphans))
|
||
}
|
||
}
|
||
log.Printf("done. processed=%d", processed)
|
||
}
|
||
|
||
// referencedKeySet 记录"正在被数据库引用的 OSS key"。
|
||
// 用 map[string]struct{} 而不是 sync.Map —— 工具是单 goroutine。
|
||
type referencedKeySet struct {
|
||
m map[string]struct{}
|
||
}
|
||
|
||
func (s *referencedKeySet) add(k string) {
|
||
if s.m == nil {
|
||
s.m = make(map[string]struct{})
|
||
}
|
||
s.m[k] = struct{}{}
|
||
}
|
||
|
||
func (s *referencedKeySet) contains(k string) bool {
|
||
_, ok := s.m[k]
|
||
return ok
|
||
}
|
||
|
||
func (s *referencedKeySet) cardinality() int {
|
||
return len(s.m)
|
||
}
|
||
|
||
// collectReferencedKeys 从 users / fan_profiles 收集所有头像 URL,
|
||
// 通过 cfg 校验 host 后转成 OSS key 入集合。
|
||
// 返回:(set, 解析失败/外链 数量)。
|
||
func collectReferencedKeys(db *gorm.DB, cfg *config.OSSConfig) (referencedKeySet, int) {
|
||
set := referencedKeySet{}
|
||
skipped := 0
|
||
|
||
var userURLs []string
|
||
if err := db.Model(&models.User{}).
|
||
Where("avatar_url IS NOT NULL AND avatar_url <> '' AND deleted_at IS NULL").
|
||
Pluck("avatar_url", &userURLs).Error; err != nil {
|
||
log.Fatalf("query users.avatar_url failed: %v", err)
|
||
}
|
||
for _, u := range userURLs {
|
||
k, err := ossutil.ExtractKeyFromPublicURL(cfg, u)
|
||
if err != nil {
|
||
skipped++
|
||
continue
|
||
}
|
||
set.add(k)
|
||
}
|
||
|
||
var profileURLs []string
|
||
if err := db.Model(&models.FanProfile{}).
|
||
Where("avatar_url IS NOT NULL AND avatar_url <> ''").
|
||
Pluck("avatar_url", &profileURLs).Error; err != nil {
|
||
log.Fatalf("query fan_profiles.avatar_url failed: %v", err)
|
||
}
|
||
for _, u := range profileURLs {
|
||
k, err := ossutil.ExtractKeyFromPublicURL(cfg, u)
|
||
if err != nil {
|
||
skipped++
|
||
continue
|
||
}
|
||
set.add(k)
|
||
}
|
||
return set, skipped
|
||
}
|
||
|
||
func humanBytes(n int64) string {
|
||
const k = 1024
|
||
if n < k {
|
||
return fmt.Sprintf("%dB", n)
|
||
}
|
||
if n < k*k {
|
||
return fmt.Sprintf("%.1fKB", float64(n)/k)
|
||
}
|
||
if n < k*k*k {
|
||
return fmt.Sprintf("%.1fMB", float64(n)/(k*k))
|
||
}
|
||
return fmt.Sprintf("%.1fGB", float64(n)/(k*k*k))
|
||
}
|