feat:修改压测
This commit is contained in:
parent
7074546959
commit
792c891158
@ -1,7 +1,7 @@
|
|||||||
# TopFans Backend Makefile
|
# TopFans Backend Makefile
|
||||||
# 用于简化开发流程
|
# 用于简化开发流程
|
||||||
|
|
||||||
.PHONY: help install-swagger gen-swagger update-swagger start-swagger start-all stop-all clean build run all loadgen-build loadgen-test loadgen-vet loadgen-ci
|
.PHONY: help install-swagger gen-swagger update-swagger start-swagger start-all stop-all clean build run all loadgen-build loadgen-test loadgen-vet loadgen-ci loadgen-seed-local loadgen-seed-prod-tunnel loadgen-cleanup-local loadgen-cleanup-prod-tunnel
|
||||||
|
|
||||||
# 默认目标
|
# 默认目标
|
||||||
help:
|
help:
|
||||||
@ -24,9 +24,15 @@ help:
|
|||||||
@echo " make all - 安装依赖 + 生成文档 + 构建"
|
@echo " make all - 安装依赖 + 生成文档 + 构建"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "压测工具:"
|
@echo "压测工具:"
|
||||||
@echo " make loadgen-build - 编译 seed + loadgen 到 bin/"
|
@echo " make loadgen-build - 编译 seed + loadgen 到 bin/"
|
||||||
@echo " make loadgen-test - 运行 loadgen 单元测试"
|
@echo " make loadgen-test - 运行 loadgen 单元测试"
|
||||||
@echo " make loadgen-vet - go vet 静态检查"
|
@echo " make loadgen-vet - go vet 静态检查"
|
||||||
|
@echo ""
|
||||||
|
@echo "压测 seed 便捷入口 (免去手敲 flag):"
|
||||||
|
@echo " make loadgen-seed-local - seed 写入本地 docker dev (top-fans:15432)"
|
||||||
|
@echo " make loadgen-seed-prod-tunnel - seed 通过 SSH 端口转发写生产 (127.0.0.1:25432)"
|
||||||
|
@echo " make loadgen-cleanup-local - 清理本地 docker 压测数据"
|
||||||
|
@echo " make loadgen-cleanup-prod-tunnel - 清理生产 docker 压测数据 (走 SSH 转发)"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "清理:"
|
@echo "清理:"
|
||||||
@echo " make clean - 清理生成的文件"
|
@echo " make clean - 清理生成的文件"
|
||||||
@ -43,9 +49,15 @@ help:
|
|||||||
@echo " make all - 安装依赖 + 生成文档 + 构建"
|
@echo " make all - 安装依赖 + 生成文档 + 构建"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "压测工具:"
|
@echo "压测工具:"
|
||||||
@echo " make loadgen-build - 编译 seed + loadgen 到 bin/"
|
@echo " make loadgen-build - 编译 seed + loadgen 到 bin/"
|
||||||
@echo " make loadgen-test - 运行 loadgen 单元测试"
|
@echo " make loadgen-test - 运行 loadgen 单元测试"
|
||||||
@echo " make loadgen-vet - go vet 静态检查"
|
@echo " make loadgen-vet - go vet 静态检查"
|
||||||
|
@echo ""
|
||||||
|
@echo "压测 seed 便捷入口 (免去手敲 flag):"
|
||||||
|
@echo " make loadgen-seed-local - seed 写入本地 docker dev (top-fans:15432)"
|
||||||
|
@echo " make loadgen-seed-prod-tunnel - seed 通过 SSH 端口转发写生产 (127.0.0.1:25432)"
|
||||||
|
@echo " make loadgen-cleanup-local - 清理本地 docker 压测数据"
|
||||||
|
@echo " make loadgen-cleanup-prod-tunnel - 清理生产 docker 压测数据 (走 SSH 转发)"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "清理:"
|
@echo "清理:"
|
||||||
@echo " make clean - 清理生成的文件"
|
@echo " make clean - 清理生成的文件"
|
||||||
@ -128,6 +140,54 @@ loadgen-vet:
|
|||||||
# loadgen 完整 CI 入口: vet + test + build
|
# loadgen 完整 CI 入口: vet + test + build
|
||||||
loadgen-ci: loadgen-vet loadgen-test loadgen-build
|
loadgen-ci: loadgen-vet loadgen-test loadgen-build
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 压测 seed 便捷入口 (按部署环境封装 DB 参数)
|
||||||
|
# ============================================================
|
||||||
|
#
|
||||||
|
# 三种部署对应的 DB 参数 (详见 backend/scripts/loadgen/seed/README.md):
|
||||||
|
# 1. 生产机直跑 : localhost:5432/topfans (seed 默认值, ssh 进去直接跑)
|
||||||
|
# 2. 本地联调 : localhost:15432/top-fans (宿主机 postgresql-database-1)
|
||||||
|
# 3. 本地连生产 : 127.0.0.1:25432/topfans (ssh -L 端口转发)
|
||||||
|
#
|
||||||
|
# 密码统一从对应 .env 文件读 (.env.prod / .env.local),避免在命令行明文泄露。
|
||||||
|
|
||||||
|
# --- 本地 docker dev (top-fans:15432, 密码 123456) ---
|
||||||
|
loadgen-seed-local: loadgen-build
|
||||||
|
@echo ">>> seed 写入本地 docker dev (top-fans:15432)"
|
||||||
|
@DB_PASSWORD=$$(grep '^DB_PASSWORD=' ../docker/.env.local | cut -d= -f2) \
|
||||||
|
JWT_SECRET=$$(grep '^JWT_SECRET=' ../docker/.env.local | cut -d= -f2) \
|
||||||
|
./bin/seed \
|
||||||
|
--db-host=localhost --db-port=15432 --db-name=top-fans --db-user=postgres
|
||||||
|
|
||||||
|
loadgen-cleanup-local: loadgen-build
|
||||||
|
@echo ">>> 清理本地 docker 压测数据 (top-fans:15432)"
|
||||||
|
@DB_PASSWORD=$$(grep '^DB_PASSWORD=' ../docker/.env.local | cut -d= -f2) \
|
||||||
|
./bin/seed --cleanup \
|
||||||
|
--db-host=localhost --db-port=15432 --db-name=top-fans --db-user=postgres
|
||||||
|
|
||||||
|
# --- 本地连生产 (ssh -L 25432 → 生产 docker 5432) ---
|
||||||
|
# 调用前请确保已建立转发: ssh -L 25432:127.0.0.1:5432 -N -f root@101.132.250.62
|
||||||
|
loadgen-seed-prod-tunnel: loadgen-build
|
||||||
|
@echo ">>> seed 通过 SSH 隧道写生产 (127.0.0.1:25432 → docker 5432)"
|
||||||
|
@if ! lsof -iTCP:25432 -sTCP:LISTEN >/dev/null 2>&1; then \
|
||||||
|
echo "❌ 25432 端口未监听,请先: ssh -L 25432:127.0.0.1:5432 -N -f root@101.132.250.62"; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
@DB_PASSWORD=$$(grep '^DB_PASSWORD=' ../docker/.env.prod | cut -d= -f2) \
|
||||||
|
JWT_SECRET=$$(grep '^JWT_SECRET=' ../docker/.env.prod | cut -d= -f2) \
|
||||||
|
./bin/seed \
|
||||||
|
--db-host=127.0.0.1 --db-port=25432 --db-name=topfans --db-user=postgres
|
||||||
|
|
||||||
|
loadgen-cleanup-prod-tunnel: loadgen-build
|
||||||
|
@echo ">>> 清理生产 docker 压测数据 (走 SSH 隧道)"
|
||||||
|
@if ! lsof -iTCP:25432 -sTCP:LISTEN >/dev/null 2>&1; then \
|
||||||
|
echo "❌ 25432 端口未监听,请先: ssh -L 25432:127.0.0.1:5432 -N -f root@101.132.250.62"; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
@DB_PASSWORD=$$(grep '^DB_PASSWORD=' ../docker/.env.prod | cut -d= -f2) \
|
||||||
|
./bin/seed --cleanup \
|
||||||
|
--db-host=127.0.0.1 --db-port=25432 --db-name=topfans --db-user=postgres
|
||||||
|
|
||||||
# 全部:安装依赖 + 生成文档 + 构建
|
# 全部:安装依赖 + 生成文档 + 构建
|
||||||
all: install-swagger gen-swagger build
|
all: install-swagger gen-swagger build
|
||||||
@echo ""
|
@echo ""
|
||||||
|
|||||||
1
backend/loadtest_bcrypt.txt
Normal file
1
backend/loadtest_bcrypt.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
$2b$10$h0UgR1Ls1DgBQnYTs0msTuBahtonqzPVu06txjyt./iNxUJsreFdO
|
||||||
@ -1,45 +1,45 @@
|
|||||||
{
|
{
|
||||||
"scenario": "S1",
|
"scenario": "S1",
|
||||||
"total_requests": 18,
|
"total_requests": 1072,
|
||||||
"errors": 0,
|
"errors": 0,
|
||||||
"five_xx": 0,
|
"five_xx": 0,
|
||||||
"p50_us": 86143,
|
"p50_us": 173823,
|
||||||
"p95_us": 95743,
|
"p95_us": 182015,
|
||||||
"p99_us": 95743,
|
"p99_us": 210175,
|
||||||
"max_us": 95743,
|
"max_us": 223999,
|
||||||
"stages": [
|
"stages": [
|
||||||
{
|
{
|
||||||
"stage_idx": 1,
|
"stage_idx": 1,
|
||||||
"target_rps": 1,
|
"target_rps": 5,
|
||||||
"total_requests": 3,
|
"total_requests": 300,
|
||||||
"errors": 0,
|
"errors": 0,
|
||||||
"five_xx": 0,
|
"five_xx": 0,
|
||||||
"p50_us": 93951,
|
"p50_us": 112063,
|
||||||
"p95_us": 98495,
|
"p95_us": 122815,
|
||||||
"p99_us": 98495,
|
"p99_us": 138751,
|
||||||
"max_us": 98495
|
"max_us": 214527
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"stage_idx": 2,
|
"stage_idx": 2,
|
||||||
"target_rps": 2,
|
"target_rps": 10,
|
||||||
"total_requests": 6,
|
"total_requests": 387,
|
||||||
"errors": 0,
|
"errors": 0,
|
||||||
"five_xx": 0,
|
"five_xx": 0,
|
||||||
"p50_us": 87295,
|
"p50_us": 174335,
|
||||||
"p95_us": 89215,
|
"p95_us": 182015,
|
||||||
"p99_us": 89215,
|
"p99_us": 203519,
|
||||||
"max_us": 89215
|
"max_us": 259199
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"stage_idx": 3,
|
"stage_idx": 3,
|
||||||
"target_rps": 3,
|
"target_rps": 20,
|
||||||
"total_requests": 9,
|
"total_requests": 385,
|
||||||
"errors": 0,
|
"errors": 0,
|
||||||
"five_xx": 0,
|
"five_xx": 0,
|
||||||
"p50_us": 86143,
|
"p50_us": 173823,
|
||||||
"p95_us": 95743,
|
"p95_us": 182015,
|
||||||
"p99_us": 95743,
|
"p99_us": 210175,
|
||||||
"max_us": 95743
|
"max_us": 223999
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,14 +1,15 @@
|
|||||||
{
|
{
|
||||||
"start_time": "2026-06-15T21:05:10.831978+08:00",
|
"start_time": "2026-06-16T22:10:55.266986+08:00",
|
||||||
"end_time": "2026-06-15T21:05:38.174693+08:00",
|
"end_time": "2026-06-16T22:13:55.674244+08:00",
|
||||||
"target": "http://localhost:8080",
|
"target": "http://101.132.250.62:8080",
|
||||||
"scenarios": [
|
"scenarios": [
|
||||||
"S1",
|
"S1",
|
||||||
"S2",
|
"S2",
|
||||||
"S4"
|
"S4"
|
||||||
],
|
],
|
||||||
"step_schedule": "1,2,3",
|
"step_schedule": "5,10,20",
|
||||||
"jwt_secret_hint": "topfans-",
|
"jwt_secret_hint": "topfans-",
|
||||||
"monitor_mode": "off",
|
"prod_ssh": "root@101.132.250.62",
|
||||||
|
"monitor_mode": "full",
|
||||||
"stage_mode": "step"
|
"stage_mode": "step"
|
||||||
}
|
}
|
||||||
@ -97,8 +97,15 @@ cd /opt/topfans/loadtest
|
|||||||
bash scripts/prod_seed.sh
|
bash scripts/prod_seed.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**脚本会先做这些预检查** (任一失败立即退出,不会写数据):
|
||||||
|
1. `/opt/topfans/docker/.env.prod` 存在 + `DB_PASSWORD` / `JWT_SECRET` 非空
|
||||||
|
2. `seed` 二进制 + `loadtest_bcrypt.txt` 都存在
|
||||||
|
3. 如果 `psql` 可用,会先 `SELECT 1` 验证能连到 `localhost:5432/topfans`
|
||||||
|
|
||||||
|
**然后**打印连接信息 + 删除/重建摘要,要求输入 `y` 确认才执行。
|
||||||
|
|
||||||
**这一步骤会做什么**:
|
**这一步骤会做什么**:
|
||||||
- 读 `/opt/topfans/docker/.env.prod` 拿 DB_PASSWORD + JWT_SECRET
|
- 显式传所有 DB 参数 (`--db-host=localhost --db-port=5432 --db-name=topfans --db-user=postgres`),不依赖 seed 默认值
|
||||||
- 插入 star_id=999900 测试明星 (1 行)
|
- 插入 star_id=999900 测试明星 (1 行)
|
||||||
- 插入 1000 个测试用户 (mobile 19900000001 - 19900001000)
|
- 插入 1000 个测试用户 (mobile 19900000001 - 19900001000)
|
||||||
- 插入 1000 个 fan_profile + crystal
|
- 插入 1000 个 fan_profile + crystal
|
||||||
|
|||||||
@ -60,6 +60,26 @@ func (cb *CircuitBreaker) Check(client ClientMetrics, server ServerMetrics, now
|
|||||||
cb.mu.Lock()
|
cb.mu.Lock()
|
||||||
defer cb.mu.Unlock()
|
defer cb.mu.Unlock()
|
||||||
|
|
||||||
|
if cb.checkClientLocked(client, now) || cb.checkServerLocked(server, now) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckClient 仅检查 client-side 指标 (R1/R2/R3: ErrorRate/P99/FiveXXRate),
|
||||||
|
// 不触发任何 server-side 检查 (R4/R5/R6: PGConn/Disk/OOM)。
|
||||||
|
//
|
||||||
|
// 用途:scenarios/common.go 在每个 HTTP 请求后调用,避免 server 指标还没从
|
||||||
|
// metrics-feed.jsonl 喂进来时(零值 ServerMetrics{} 会误触发 DiskGB < 5 规则)
|
||||||
|
// 导致 circuit breaker 误 trip。Server-side 检查统一交给 main.go 的
|
||||||
|
// consumeServerMetrics goroutine,每 5 秒基于真实 server metrics 做一次。
|
||||||
|
func (cb *CircuitBreaker) CheckClient(client ClientMetrics, now time.Time) bool {
|
||||||
|
cb.mu.Lock()
|
||||||
|
defer cb.mu.Unlock()
|
||||||
|
return cb.checkClientLocked(client, now)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cb *CircuitBreaker) checkClientLocked(client ClientMetrics, now time.Time) bool {
|
||||||
if client.ErrorRate > cb.ErrRate {
|
if client.ErrorRate > cb.ErrRate {
|
||||||
if cb.errRateStart.IsZero() {
|
if cb.errRateStart.IsZero() {
|
||||||
cb.errRateStart = now
|
cb.errRateStart = now
|
||||||
@ -96,6 +116,10 @@ func (cb *CircuitBreaker) Check(client ClientMetrics, server ServerMetrics, now
|
|||||||
cb.fiveXXStart = time.Time{}
|
cb.fiveXXStart = time.Time{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cb *CircuitBreaker) checkServerLocked(server ServerMetrics, now time.Time) bool {
|
||||||
if server.PGActive > cb.PGConnMax {
|
if server.PGActive > cb.PGConnMax {
|
||||||
if cb.pgConnStart.IsZero() {
|
if cb.pgConnStart.IsZero() {
|
||||||
cb.pgConnStart = now
|
cb.pgConnStart = now
|
||||||
|
|||||||
@ -61,3 +61,48 @@ func TestCircuitBreaker_Recovers(t *testing.T) {
|
|||||||
t.Error("should remain OK")
|
t.Error("should remain OK")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestCheckClient_IgnoresServerMetrics 验证 CheckClient 只看 client 指标,
|
||||||
|
// 不会因为 ServerMetrics{} 零值 (DiskGB=0) 触发 R5 disk < 5GB 误 trip。
|
||||||
|
// 回归测试:之前 scenarios/common.go 每次 HTTP 请求都传 ServerMetrics{},
|
||||||
|
// 导致 breaker 每 30 秒误 trip 一次,屏蔽掉 R1/R2/R3 的真实告警。
|
||||||
|
func TestCheckClient_IgnoresServerMetrics(t *testing.T) {
|
||||||
|
cb := NewCircuitBreaker()
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
// 模拟连续 2 分钟的请求(远超 SustainTime=30s)
|
||||||
|
for i := range 24 {
|
||||||
|
ts := now.Add(time.Duration(i*5) * time.Second)
|
||||||
|
if cb.CheckClient(ClientMetrics{ErrorRate: 0.01}, ts) {
|
||||||
|
t.Fatalf("CheckClient should never trip on healthy client metrics, "+
|
||||||
|
"iter=%d ts=%v", i, ts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cb.State() != CircuitOK {
|
||||||
|
t.Error("breaker should remain OK; CheckClient must not trigger R4/R5/R6")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCheckClient_StillTripsOnErrorRate 验证 CheckClient 仍能正确触发
|
||||||
|
// R1 (ErrorRate > 0.05 持续 30s) — 不能因为拆分就把 client 检查也漏掉。
|
||||||
|
func TestCheckClient_StillTripsOnErrorRate(t *testing.T) {
|
||||||
|
cb := NewCircuitBreaker()
|
||||||
|
now := time.Now()
|
||||||
|
if cb.CheckClient(ClientMetrics{ErrorRate: 0.06}, now) {
|
||||||
|
t.Error("R1 should not trip on first check")
|
||||||
|
}
|
||||||
|
if !cb.CheckClient(ClientMetrics{ErrorRate: 0.06}, now.Add(31*time.Second)) {
|
||||||
|
t.Error("R1 should trip after 30s sustain")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCheckClient_StillTripsOnP99 验证 R2 (P99Ms > 3000 持续 30s) 仍生效。
|
||||||
|
func TestCheckClient_StillTripsOnP99(t *testing.T) {
|
||||||
|
cb := NewCircuitBreaker()
|
||||||
|
now := time.Now()
|
||||||
|
cb.CheckClient(ClientMetrics{P99Ms: 4000}, now)
|
||||||
|
if !cb.CheckClient(ClientMetrics{P99Ms: 4000}, now.Add(31*time.Second)) {
|
||||||
|
t.Error("R2 P99>3000 sustained should trip")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -218,13 +218,17 @@ func runLoadgen(target, scenarioIDs, stage, stepSchedule string, rps, vus int, d
|
|||||||
func consumeServerMetrics(ctx context.Context, feed <-chan lib.MetricsLine, breaker *lib.CircuitBreaker, rec *lib.LatencyRecorder, errCount, totalCount, fiveXXCount *atomic.Int64) {
|
func consumeServerMetrics(ctx context.Context, feed <-chan lib.MetricsLine, breaker *lib.CircuitBreaker, rec *lib.LatencyRecorder, errCount, totalCount, fiveXXCount *atomic.Int64) {
|
||||||
ticker := time.NewTicker(5 * time.Second)
|
ticker := time.NewTicker(5 * time.Second)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
var latestServer lib.ServerMetrics
|
// 用指针,nil 表示"还没从 metrics feed 拿到任何一行"。
|
||||||
|
// 修前 bug: 用值类型 var latestServer lib.ServerMetrics,
|
||||||
|
// 零值 DiskGB=0 永远 < 5,30 秒后 R5 disk 规则误 trip。
|
||||||
|
var latestServer *lib.ServerMetrics
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
case ml := <-feed:
|
case ml := <-feed:
|
||||||
latestServer = ml.ToServerMetrics()
|
sm := ml.ToServerMetrics()
|
||||||
|
latestServer = &sm
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
tot := totalCount.Load()
|
tot := totalCount.Load()
|
||||||
if tot == 0 {
|
if tot == 0 {
|
||||||
@ -238,7 +242,14 @@ func consumeServerMetrics(ctx context.Context, feed <-chan lib.MetricsLine, brea
|
|||||||
if snap.TotalCount() > 0 {
|
if snap.TotalCount() > 0 {
|
||||||
clientMetrics.P99Ms = snap.ValueAtPercentile(99) / 1000
|
clientMetrics.P99Ms = snap.ValueAtPercentile(99) / 1000
|
||||||
}
|
}
|
||||||
if breaker.Check(clientMetrics, latestServer, time.Now()) {
|
var tripped bool
|
||||||
|
if latestServer == nil {
|
||||||
|
// metrics feed 还没数据,只做 client 检查 (R1/R2/R3)
|
||||||
|
tripped = breaker.CheckClient(clientMetrics, time.Now())
|
||||||
|
} else {
|
||||||
|
tripped = breaker.Check(clientMetrics, *latestServer, time.Now())
|
||||||
|
}
|
||||||
|
if tripped {
|
||||||
log.Printf("🚨 circuit breaker tripped!")
|
log.Printf("🚨 circuit breaker tripped!")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -42,16 +42,26 @@ func runPreflight(target, prodSSH string) error {
|
|||||||
Detail: strings.TrimSpace(string(out)),
|
Detail: strings.TrimSpace(string(out)),
|
||||||
})
|
})
|
||||||
|
|
||||||
// ③ pg_dump backup file exists
|
// ③ pg_dump backup file exists (远程路径,需用 SSH 拿大小,不能用 os.Stat)
|
||||||
|
// 修复前 bug: 用 os.Stat 去查远程路径,本地必然不存在导致 info=nil,
|
||||||
|
// Sprintf 里的 info.Size() 无 nil 保护 → panic。
|
||||||
|
// 修复: 用 `ssh prodSSH stat -c%s <file>` 拿远程大小,避免走本地 fs。
|
||||||
cmd = exec.Command("ssh", prodSSH, "ls -t /opt/topfans/backups/pre-loadtest-*.sql 2>/dev/null | head -1")
|
cmd = exec.Command("ssh", prodSSH, "ls -t /opt/topfans/backups/pre-loadtest-*.sql 2>/dev/null | head -1")
|
||||||
out, _ = cmd.Output()
|
out, _ = cmd.Output()
|
||||||
backupFile := strings.TrimSpace(string(out))
|
backupFile := strings.TrimSpace(string(out))
|
||||||
info, statErr := os.Stat(backupFile)
|
var backupSize int64 = -1
|
||||||
sizeOK := statErr == nil && info.Size() > 50*1024*1024
|
if backupFile != "" {
|
||||||
|
cmd = exec.Command("ssh", prodSSH, fmt.Sprintf("stat -c%%s %s", backupFile))
|
||||||
|
sizeOut, sizeErr := cmd.Output()
|
||||||
|
if sizeErr == nil {
|
||||||
|
fmt.Sscanf(strings.TrimSpace(string(sizeOut)), "%d", &backupSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sizeOK := backupSize > 50*1024*1024
|
||||||
checks = append(checks, CheckResult{
|
checks = append(checks, CheckResult{
|
||||||
Name: "③ pg_dump backup exists (>50MB)",
|
Name: "③ pg_dump backup exists (>50MB)",
|
||||||
Passed: sizeOK,
|
Passed: sizeOK,
|
||||||
Detail: fmt.Sprintf("file=%s size=%d", backupFile, ifZero64(info.Size())),
|
Detail: fmt.Sprintf("file=%s size=%d", backupFile, backupSize),
|
||||||
})
|
})
|
||||||
|
|
||||||
// ⑤ prod 磁盘空闲 > 10GB
|
// ⑤ prod 磁盘空闲 > 10GB
|
||||||
@ -119,13 +129,6 @@ func ifZero(v int) int {
|
|||||||
return v
|
return v
|
||||||
}
|
}
|
||||||
|
|
||||||
func ifZero64(v int64) int64 {
|
|
||||||
if v == 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
|
|
||||||
func ifEmpty(empty bool, s string) string {
|
func ifEmpty(empty bool, s string) string {
|
||||||
if empty {
|
if empty {
|
||||||
return s
|
return s
|
||||||
|
|||||||
@ -21,7 +21,7 @@ func doRequest(client *http.Client, req *http.Request, rec *lib.LatencyRecorder,
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
errCount.Add(1)
|
errCount.Add(1)
|
||||||
rec.RecordResult(true, false)
|
rec.RecordResult(true, false)
|
||||||
checkBreaker(client, rec, errCount, totalCount, fiveXXCount, breaker)
|
checkBreaker(rec, errCount, totalCount, fiveXXCount, breaker)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
@ -34,10 +34,10 @@ func doRequest(client *http.Client, req *http.Request, rec *lib.LatencyRecorder,
|
|||||||
errCount.Add(1)
|
errCount.Add(1)
|
||||||
}
|
}
|
||||||
rec.RecordResult(isErr, is5xx)
|
rec.RecordResult(isErr, is5xx)
|
||||||
checkBreaker(client, rec, errCount, totalCount, fiveXXCount, breaker)
|
checkBreaker(rec, errCount, totalCount, fiveXXCount, breaker)
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkBreaker(client *http.Client, rec *lib.LatencyRecorder, errCount, totalCount, fiveXXCount *atomic.Int64, breaker *lib.CircuitBreaker) {
|
func checkBreaker(rec *lib.LatencyRecorder, errCount, totalCount, fiveXXCount *atomic.Int64, breaker *lib.CircuitBreaker) {
|
||||||
if breaker == nil {
|
if breaker == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -53,5 +53,9 @@ func checkBreaker(client *http.Client, rec *lib.LatencyRecorder, errCount, total
|
|||||||
if snap.TotalCount() > 0 {
|
if snap.TotalCount() > 0 {
|
||||||
clientMetrics.P99Ms = snap.ValueAtPercentile(99) / 1000
|
clientMetrics.P99Ms = snap.ValueAtPercentile(99) / 1000
|
||||||
}
|
}
|
||||||
breaker.Check(clientMetrics, lib.ServerMetrics{}, time.Now())
|
// 只检查 client 指标 (R1/R2/R3);server 指标 (R4/R5/R6) 由
|
||||||
|
// main.go consumeServerMetrics goroutine 每 5 秒基于真实 metrics
|
||||||
|
// feed 检查,避免在 metrics feed 还没数据时(零值 ServerMetrics{}
|
||||||
|
// 会让 DiskGB=0 < 5 触发误 trip)
|
||||||
|
breaker.CheckClient(clientMetrics, time.Now())
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,30 +3,99 @@
|
|||||||
# prod seed 一键运行脚本
|
# prod seed 一键运行脚本
|
||||||
# 用途:从 /opt/topfans/docker/.env.prod 读 DB/JWT 凭据,跑 seed 工具
|
# 用途:从 /opt/topfans/docker/.env.prod 读 DB/JWT 凭据,跑 seed 工具
|
||||||
# 使用:ssh root@101.132.250.62 "bash /opt/topfans/loadtest/scripts/prod_seed.sh"
|
# 使用:ssh root@101.132.250.62 "bash /opt/topfans/loadtest/scripts/prod_seed.sh"
|
||||||
|
#
|
||||||
|
# 设计原则(与 backend/scripts/loadgen/seed/README.md 同步):
|
||||||
|
# - 显式传所有 DB 参数,不依赖 seed 二进制的默认值
|
||||||
|
# (避免 seed 默认值被改时突然连错地方)
|
||||||
|
# - 前置检查 .env.prod / loadtest_bcrypt.txt / DB 连通性
|
||||||
|
# - 所有写操作指向生产 docker 暴露的 5432 端口 (宿主机 localhost:5432
|
||||||
|
# = docker 端口映射 `5432:5432`,不是容器内)
|
||||||
# ===================================================================
|
# ===================================================================
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
ENV_FILE="/opt/topfans/docker/.env.prod"
|
ENV_FILE="/opt/topfans/docker/.env.prod"
|
||||||
LOADTEST_DIR="/opt/topfans/loadtest"
|
LOADTEST_DIR="/opt/topfans/loadtest"
|
||||||
|
SEED_BIN="$LOADTEST_DIR/seed"
|
||||||
|
BCRYPT_FILE="$LOADTEST_DIR/loadtest_bcrypt.txt"
|
||||||
|
|
||||||
|
# 这些值必须跟 docker/.env.prod + docker-compose.prod.yml 保持一致
|
||||||
|
EXPECTED_DB_HOST="localhost"
|
||||||
|
EXPECTED_DB_PORT="5432"
|
||||||
|
EXPECTED_DB_NAME="topfans"
|
||||||
|
EXPECTED_DB_USER="postgres"
|
||||||
|
|
||||||
|
# ===== 1. 预检查:env 文件 =====
|
||||||
if [[ ! -f "$ENV_FILE" ]]; then
|
if [[ ! -f "$ENV_FILE" ]]; then
|
||||||
echo "❌ $ENV_FILE 不存在"
|
echo "❌ $ENV_FILE 不存在"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export DB_PASSWORD=$(grep '^DB_PASSWORD=' "$ENV_FILE" | cut -d= -f2)
|
DB_PASSWORD=$(grep '^DB_PASSWORD=' "$ENV_FILE" | cut -d= -f2)
|
||||||
export JWT_SECRET=$(grep '^JWT_SECRET=' "$ENV_FILE" | cut -d= -f2)
|
JWT_SECRET=$(grep '^JWT_SECRET=' "$ENV_FILE" | cut -d= -f2)
|
||||||
|
|
||||||
|
if [[ -z "$DB_PASSWORD" ]]; then
|
||||||
|
echo "❌ $ENV_FILE 里 DB_PASSWORD 为空"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ -z "$JWT_SECRET" ]]; then
|
||||||
|
echo "❌ $ENV_FILE 里 JWT_SECRET 为空"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ===== 2. 预检查:seed 二进制 + bcrypt 文件 =====
|
||||||
|
if [[ ! -x "$SEED_BIN" ]]; then
|
||||||
|
echo "❌ $SEED_BIN 不存在或不可执行,先按 RUNBOOK.md §1 上传:"
|
||||||
|
echo " scp bin/seed root@101.132.250.62:$LOADTEST_DIR/"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ ! -f "$BCRYPT_FILE" ]]; then
|
||||||
|
echo "❌ $BCRYPT_FILE 不存在,seed 会读不到密码哈希"
|
||||||
|
echo " 按 RUNBOOK.md §1.2 重新生成并上传"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ===== 3. 预检查:DB 连通性 (避免 seed 跑到一半才发现连不上) =====
|
||||||
|
if command -v psql >/dev/null 2>&1; then
|
||||||
|
if ! PGPASSWORD="$DB_PASSWORD" psql \
|
||||||
|
-h "$EXPECTED_DB_HOST" -p "$EXPECTED_DB_PORT" \
|
||||||
|
-U "$EXPECTED_DB_USER" -d "$EXPECTED_DB_NAME" \
|
||||||
|
-c 'SELECT 1' >/dev/null 2>&1; then
|
||||||
|
echo "❌ 连不上 $EXPECTED_DB_HOST:$EXPECTED_DB_PORT/$EXPECTED_DB_NAME"
|
||||||
|
echo " 检查 docker ps | grep topfans-postgres 确认容器在跑"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ===== 4. 打印信息并要求确认 =====
|
||||||
cd "$LOADTEST_DIR"
|
cd "$LOADTEST_DIR"
|
||||||
|
|
||||||
echo "=========================================="
|
echo "=========================================="
|
||||||
echo "prod seed - 准备 loadtest 数据"
|
echo "prod seed - 准备 loadtest 数据"
|
||||||
echo "DB host: localhost (容器内)"
|
echo "DB host : $EXPECTED_DB_HOST:$EXPECTED_DB_PORT"
|
||||||
echo "DB name: topfans"
|
echo " (生产 docker 端口映射 5432:5432,从宿主机 localhost 访问)"
|
||||||
echo "JWT secret: ${JWT_SECRET:0:10}..."
|
echo "DB name : $EXPECTED_DB_NAME"
|
||||||
|
echo "DB user : $EXPECTED_DB_USER"
|
||||||
|
echo "JWT secret : ${JWT_SECRET:0:10}..."
|
||||||
|
echo "bcrypt 文件: $BCRYPT_FILE ($(wc -c <"$BCRYPT_FILE") bytes)"
|
||||||
echo "=========================================="
|
echo "=========================================="
|
||||||
|
echo ""
|
||||||
|
echo "⚠️ 即将:删除 star_id=999900 的旧测试数据 → 重新灌 23k 行"
|
||||||
|
echo " 真实业务数据 (star_id 87/88/91/93/94/95) 不受影响"
|
||||||
|
echo ""
|
||||||
|
read -r -p "确认继续? [y/N] " confirm
|
||||||
|
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||||
|
echo "已取消"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
./seed --db-name=topfans --jwt-secret="$JWT_SECRET"
|
# ===== 5. 跑 seed (显式传所有 DB 参数,排除默认值被改的风险) =====
|
||||||
|
./seed \
|
||||||
|
--db-host="$EXPECTED_DB_HOST" \
|
||||||
|
--db-port="$EXPECTED_DB_PORT" \
|
||||||
|
--db-name="$EXPECTED_DB_NAME" \
|
||||||
|
--db-user="$EXPECTED_DB_USER" \
|
||||||
|
--db-password="$DB_PASSWORD" \
|
||||||
|
--jwt-secret="$JWT_SECRET"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "✅ seed 完成。生成的文件:"
|
echo "✅ seed 完成。生成的文件:"
|
||||||
|
|||||||
@ -21,7 +21,7 @@ make loadgen-build
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 在 prod 上跑 (凌晨 T0 = 02:00)
|
## 在 prod 上跑 (凌晨 T0 = 02:00,推荐)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ssh root@101.132.250.62
|
ssh root@101.132.250.62
|
||||||
@ -31,12 +31,58 @@ bash scripts/prod_seed.sh
|
|||||||
|
|
||||||
这个脚本会自动:
|
这个脚本会自动:
|
||||||
1. 读 `/opt/topfans/docker/.env.prod` 拿 DB_PASSWORD + JWT_SECRET
|
1. 读 `/opt/topfans/docker/.env.prod` 拿 DB_PASSWORD + JWT_SECRET
|
||||||
2. 跑 seed (插入 23k 行测试数据)
|
2. 跑 seed (插入 23k 行测试数据,直接写入 docker 里的 `topfans-postgres`)
|
||||||
3. 自动重置 PG 序列 (CLAUDE.md 规范)
|
3. 自动重置 PG 序列 (CLAUDE.md 规范)
|
||||||
4. 写 `users.csv` (含 1000 个 JWT)
|
4. 写 `users.csv` (含 1000 个 JWT)
|
||||||
|
|
||||||
**预计耗时**:30-60 秒
|
**预计耗时**:30-60 秒
|
||||||
|
|
||||||
|
> ⚠️ **为什么必须在生产机上跑**:`seed` 默认值是 `localhost:5432/topfans`,
|
||||||
|
> 这正好等于生产机上 docker 暴露的 `5432:5432` + `POSTGRES_DB=topfans`。
|
||||||
|
> 从本地 Mac 跑默认值会**连到你本机的 Postgres**(可能根本不是 topfans 库),
|
||||||
|
> 后续 `users.csv` 拿去打生产网关会全部 401。详见下一节。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 从本地 Mac 连到生产 DB (不 ssh 进生产机)
|
||||||
|
|
||||||
|
只适合本地紧急补 seed、或者压测脚本调试。**首选仍是上一节 ssh 进生产机**。
|
||||||
|
|
||||||
|
### 方式 A:SSH 端口转发 (推荐,改动最小)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 建立转发:本地 25432 → 生产机 docker 5432
|
||||||
|
ssh -L 25432:127.0.0.1:5432 -N -f root@101.132.250.62
|
||||||
|
|
||||||
|
# 2. 跑 seed (从 docker/.env.prod 拿密码)
|
||||||
|
cd /Users/liulujian/Documents/code/TopFansByGithub
|
||||||
|
DB_PASSWORD=$(grep '^DB_PASSWORD=' docker/.env.prod | cut -d= -f2) \
|
||||||
|
JWT_SECRET=$(grep '^JWT_SECRET=' docker/.env.prod | cut -d= -f2) \
|
||||||
|
go run ./backend/scripts/loadgen/seed \
|
||||||
|
--db-host=127.0.0.1 \
|
||||||
|
--db-port=25432 \
|
||||||
|
--db-name=topfans \
|
||||||
|
--db-user=postgres
|
||||||
|
|
||||||
|
# 3. 用完记得关转发
|
||||||
|
pkill -f 'ssh -L 25432'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 方式 B:从本地直连 docker 容器 (生产机已开 5432 端口时)
|
||||||
|
|
||||||
|
如果生产机 `topfans-postgres` 已经通过 `ports: - "5432:5432"` 暴露到外网,
|
||||||
|
也可以直接走公网,但**强烈不建议** (生产 PG 暴露公网本身就有风险):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DB_PASSWORD=$(grep '^DB_PASSWORD=' docker/.env.prod | cut -d= -f2) \
|
||||||
|
JWT_SECRET=$(grep '^JWT_SECRET=' docker/.env.prod | cut -d= -f2) \
|
||||||
|
go run ./backend/scripts/loadgen/seed \
|
||||||
|
--db-host=101.132.250.62 \
|
||||||
|
--db-port=5432 \
|
||||||
|
--db-name=topfans \
|
||||||
|
--db-user=postgres
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 在本地 docker 跑 (开发联调)
|
## 在本地 docker 跑 (开发联调)
|
||||||
@ -77,11 +123,24 @@ Usage of ./bin/seed:
|
|||||||
-jwt-secret string # JWT 密钥 (默认 $JWT_SECRET)
|
-jwt-secret string # JWT 密钥 (默认 $JWT_SECRET)
|
||||||
-db-host string # PG host (默认 localhost)
|
-db-host string # PG host (默认 localhost)
|
||||||
-db-port int # PG port (默认 5432)
|
-db-port int # PG port (默认 5432)
|
||||||
-db-name string # PG 数据库 (prod=topfans, 本地=top-fans)
|
-db-name string # PG 数据库 (默认 topfans,即 prod 默认;
|
||||||
|
# 本地 docker dev 用 top-fans,需显式 --db-name=top-fans)
|
||||||
-db-user string # PG user (默认 postgres)
|
-db-user string # PG user (默认 postgres)
|
||||||
-db-password string # PG 密码 (默认 $DB_PASSWORD)
|
-db-password string # PG 密码 (默认 $DB_PASSWORD)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 三种部署对应的 DB 参数速查表
|
||||||
|
|
||||||
|
| 部署 | host | port | dbname | user | password |
|
||||||
|
|------|------|------|--------|------|----------|
|
||||||
|
| **生产机直跑** (ssh 进去) | `localhost` | `5432` | `topfans` | `postgres` | `$DB_PASSWORD` (从 `docker/.env.prod` 读) |
|
||||||
|
| **本地连生产** (SSH 端口转发) | `127.0.0.1` | `25432` | `topfans` | `postgres` | `$DB_PASSWORD` (从 `docker/.env.prod` 读) |
|
||||||
|
| **本地 docker 联调** (宿主机 postgresql-database-1) | `localhost` | `15432` | `top-fans` | `postgres` | `123456` (从 `docker/.env.local` 读) |
|
||||||
|
|
||||||
|
> **默认值设计意图**:`localhost:5432/topfans` 是给"在生产机上直接跑"设计的,
|
||||||
|
> 因为生产 docker 把 5432 暴露到宿主机 5432。从本地 Mac 跑时**必须显式覆盖
|
||||||
|
> `--db-host` / `--db-port` / `--db-name`**,否则会连到本机 Postgres。
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 三种"清理"模式对比
|
## 三种"清理"模式对比
|
||||||
|
|||||||
@ -79,9 +79,12 @@ func main() {
|
|||||||
func parseFlags() *Config {
|
func parseFlags() *Config {
|
||||||
cfg := &Config{}
|
cfg := &Config{}
|
||||||
flag.StringVar(&cfg.JWTSecret, "jwt-secret", os.Getenv("JWT_SECRET"), "JWT secret (或 $JWT_SECRET)")
|
flag.StringVar(&cfg.JWTSecret, "jwt-secret", os.Getenv("JWT_SECRET"), "JWT secret (或 $JWT_SECRET)")
|
||||||
flag.StringVar(&cfg.DBHost, "db-host", "localhost", "PG host")
|
flag.StringVar(&cfg.DBHost, "db-host", "localhost",
|
||||||
flag.IntVar(&cfg.DBPort, "db-port", 5432, "PG port")
|
"PG host (默认 localhost — 生产机直跑用; 本地联调要覆盖,详见 README)")
|
||||||
flag.StringVar(&cfg.DBName, "db-name", "topfans", "PG database name (本地为 'top-fans' 带横线)")
|
flag.IntVar(&cfg.DBPort, "db-port", 5432,
|
||||||
|
"PG port (默认 5432 — 生产机直跑用; 本地 docker dev 用 15432)")
|
||||||
|
flag.StringVar(&cfg.DBName, "db-name", "topfans",
|
||||||
|
"PG database name (默认 topfans=生产; 本地 docker dev 需显式传 'top-fans')")
|
||||||
flag.StringVar(&cfg.DBUser, "db-user", "postgres", "PG user")
|
flag.StringVar(&cfg.DBUser, "db-user", "postgres", "PG user")
|
||||||
flag.StringVar(&cfg.DBPass, "db-password", os.Getenv("DB_PASSWORD"), "PG password (或 $DB_PASSWORD)")
|
flag.StringVar(&cfg.DBPass, "db-password", os.Getenv("DB_PASSWORD"), "PG password (或 $DB_PASSWORD)")
|
||||||
flag.BoolVar(&cfg.Reset, "reset", false, "delete existing test data before seed")
|
flag.BoolVar(&cfg.Reset, "reset", false, "delete existing test data before seed")
|
||||||
|
|||||||
2000
backend/users.csv
2000
backend/users.csv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user