569 lines
24 KiB
Python
569 lines
24 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
收入完整性检测算法专项测试数据生成脚本
|
||
生成不同风险等级的测试场景数据
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import json
|
||
import random
|
||
from datetime import datetime, timedelta
|
||
from typing import List, Dict, Any
|
||
|
||
sys.path.append('/Users/liulujian/Documents/code/deeprisk-claude-1/backend')
|
||
|
||
from loguru import logger
|
||
|
||
logger.add("revenue_test_data.log", rotation="100 MB", level="INFO")
|
||
|
||
class RevenueTestDataGenerator:
|
||
"""收入完整性检测专项测试数据生成器"""
|
||
|
||
def __init__(self):
|
||
self.output_dir = "/Users/liulujian/Documents/code/deeprisk-claude-1/backend/test_data/revenue_test"
|
||
os.makedirs(self.output_dir, exist_ok=True)
|
||
|
||
def generate_streamer_info(self) -> List[Dict]:
|
||
"""生成主播信息数据"""
|
||
logger.info("生成主播信息数据...")
|
||
streamers = [
|
||
{
|
||
"streamer_id": "TEST_001",
|
||
"streamer_name": "测试主播-严重漏报",
|
||
"tax_no": "91110000123456789A",
|
||
"platform": "抖音",
|
||
"tier": "S",
|
||
"status": "active",
|
||
"created_at": "2024-01-01 00:00:00"
|
||
},
|
||
{
|
||
"streamer_id": "TEST_002",
|
||
"streamer_name": "测试主播-高风险",
|
||
"tax_no": "91110000987654321B",
|
||
"platform": "快手",
|
||
"tier": "A",
|
||
"status": "active",
|
||
"created_at": "2024-01-01 00:00:00"
|
||
},
|
||
{
|
||
"streamer_id": "TEST_003",
|
||
"streamer_name": "测试主播-中风险",
|
||
"tax_no": "91110000555666777C",
|
||
"platform": "淘宝直播",
|
||
"tier": "B",
|
||
"status": "active",
|
||
"created_at": "2024-01-01 00:00:00"
|
||
},
|
||
{
|
||
"streamer_id": "TEST_004",
|
||
"streamer_name": "测试主播-低风险",
|
||
"tax_no": "91110000333444555D",
|
||
"platform": "小红书",
|
||
"tier": "A",
|
||
"status": "active",
|
||
"created_at": "2024-01-01 00:00:00"
|
||
},
|
||
{
|
||
"streamer_id": "TEST_005",
|
||
"streamer_name": "测试主播-正常",
|
||
"tax_no": "91110000111222333E",
|
||
"platform": "抖音",
|
||
"tier": "S",
|
||
"status": "active",
|
||
"created_at": "2024-01-01 00:00:00"
|
||
},
|
||
{
|
||
"streamer_id": "TEST_006",
|
||
"streamer_name": "测试主播-完全未申报",
|
||
"tax_no": "91110000777888999F",
|
||
"platform": "快手",
|
||
"tier": "S",
|
||
"status": "active",
|
||
"created_at": "2024-01-01 00:00:00"
|
||
},
|
||
{
|
||
"streamer_id": "TEST_007",
|
||
"streamer_name": "测试主播-多平台收入",
|
||
"tax_no": "91110000222333444G",
|
||
"platform": "抖音",
|
||
"tier": "S",
|
||
"status": "active",
|
||
"created_at": "2024-01-01 00:00:00"
|
||
},
|
||
{
|
||
"streamer_id": "TEST_008",
|
||
"streamer_name": "测试主播-分批申报",
|
||
"tax_no": "91110000555666777H",
|
||
"platform": "快手",
|
||
"tier": "A",
|
||
"status": "active",
|
||
"created_at": "2024-01-01 00:00:00"
|
||
}
|
||
]
|
||
logger.info(f"成功生成 {len(streamers)} 个主播信息")
|
||
return streamers
|
||
|
||
def generate_revenue_sharing_contracts(self, streamers: List[Dict]) -> List[Dict]:
|
||
"""生成主播分成协议数据"""
|
||
logger.info("生成分成协议数据...")
|
||
contracts = []
|
||
base_date = datetime(2024, 1, 1)
|
||
|
||
for streamer in streamers:
|
||
# 为每个主播创建多个分成协议
|
||
for i in range(3): # 每个主播3个协议
|
||
contract = {
|
||
"contract_id": f"CONTRACT_{streamer['streamer_id']}_{i+1:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"platform": streamer['platform'],
|
||
"start_date": (base_date + timedelta(days=i*30)).strftime("%Y-%m-%d"),
|
||
"end_date": (base_date + timedelta(days=365)).strftime("%Y-%m-%d"),
|
||
"share_ratio": round(random.uniform(0.3, 0.7), 2), # 30%-70%分成
|
||
"platform_share_ratio": round(random.uniform(0.3, 0.7), 2), # 平台分成
|
||
"status": "active",
|
||
"created_at": (base_date + timedelta(days=i*30)).strftime("%Y-%m-%d %H:%M:%S")
|
||
}
|
||
contracts.append(contract)
|
||
|
||
logger.info(f"成功生成 {len(contracts)} 个分成协议")
|
||
return contracts
|
||
|
||
def generate_recharge_data(self, streamers: List[Dict]) -> List[Dict]:
|
||
"""生成充值数据 - 不同风险场景"""
|
||
logger.info("生成充值数据(多风险场景)...")
|
||
recharges = []
|
||
base_date = datetime(2024, 1, 1)
|
||
|
||
# 场景1: 严重漏报 (TEST_001) - 充值50万,申报10万
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_001')
|
||
for day in range(1, 31): # 30天
|
||
amount = random.randint(10000, 20000) # 每天1-2万
|
||
recharges.append({
|
||
"recharge_id": f"RECH_001_{day:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"recharge_date": (base_date + timedelta(days=day-1)).strftime("%Y-%m-%d"),
|
||
"recharge_amount": float(amount),
|
||
"payment_method": random.choice(["支付宝", "微信支付", "银行卡"]),
|
||
"payment_status": "completed",
|
||
"platform": streamer['platform']
|
||
})
|
||
|
||
# 场景2: 高风险 (TEST_002) - 充值30万,申报18万
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_002')
|
||
for day in range(1, 31):
|
||
amount = random.randint(8000, 12000)
|
||
recharges.append({
|
||
"recharge_id": f"RECH_002_{day:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"recharge_date": (base_date + timedelta(days=day-1)).strftime("%Y-%m-%d"),
|
||
"recharge_amount": float(amount),
|
||
"payment_method": random.choice(["支付宝", "微信支付", "银行卡"]),
|
||
"payment_status": "completed",
|
||
"platform": streamer['platform']
|
||
})
|
||
|
||
# 场景3: 中风险 (TEST_003) - 充值10万,申报8.5万
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_003')
|
||
for day in range(1, 31):
|
||
amount = random.randint(2500, 4000)
|
||
recharges.append({
|
||
"recharge_id": f"RECH_003_{day:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"recharge_date": (base_date + timedelta(days=day-1)).strftime("%Y-%m-%d"),
|
||
"recharge_amount": float(amount),
|
||
"payment_method": random.choice(["支付宝", "微信支付", "银行卡"]),
|
||
"payment_status": "completed",
|
||
"platform": streamer['platform']
|
||
})
|
||
|
||
# 场景4: 低风险 (TEST_004) - 充值5万,申报4.7万
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_004')
|
||
for day in range(1, 31):
|
||
amount = random.randint(1200, 2000)
|
||
recharges.append({
|
||
"recharge_id": f"RECH_004_{day:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"recharge_date": (base_date + timedelta(days=day-1)).strftime("%Y-%m-%d"),
|
||
"recharge_amount": float(amount),
|
||
"payment_method": random.choice(["支付宝", "微信支付", "银行卡"]),
|
||
"payment_status": "completed",
|
||
"platform": streamer['platform']
|
||
})
|
||
|
||
# 场景5: 正常 (TEST_005) - 充值10万,申报9.8万(正常误差)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_005')
|
||
for day in range(1, 31):
|
||
amount = random.randint(2500, 4000)
|
||
recharges.append({
|
||
"recharge_id": f"RECH_005_{day:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"recharge_date": (base_date + timedelta(days=day-1)).strftime("%Y-%m-%d"),
|
||
"recharge_amount": float(amount),
|
||
"payment_method": random.choice(["支付宝", "微信支付", "银行卡"]),
|
||
"payment_status": "completed",
|
||
"platform": streamer['platform']
|
||
})
|
||
|
||
# 场景6: 完全未申报 (TEST_006) - 充值80万,申报0
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_006')
|
||
for day in range(1, 31):
|
||
amount = random.randint(20000, 30000)
|
||
recharges.append({
|
||
"recharge_id": f"RECH_006_{day:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"recharge_date": (base_date + timedelta(days=day-1)).strftime("%Y-%m-%d"),
|
||
"recharge_amount": float(amount),
|
||
"payment_method": random.choice(["支付宝", "微信支付", "银行卡"]),
|
||
"payment_status": "completed",
|
||
"platform": streamer['platform']
|
||
})
|
||
|
||
# 场景7: 多平台收入 (TEST_007) - 充值15万,申报5万
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_007')
|
||
for day in range(1, 31):
|
||
amount = random.randint(4000, 6000)
|
||
recharges.append({
|
||
"recharge_id": f"RECH_007_{day:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"recharge_date": (base_date + timedelta(days=day-1)).strftime("%Y-%m-%d"),
|
||
"recharge_amount": float(amount),
|
||
"payment_method": random.choice(["支付宝", "微信支付", "银行卡"]),
|
||
"payment_status": "completed",
|
||
"platform": streamer['platform']
|
||
})
|
||
|
||
# 场景8: 分批申报 (TEST_008) - 充值12万,申报6万(分批)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_008')
|
||
for day in range(1, 31):
|
||
amount = random.randint(3000, 5000)
|
||
recharges.append({
|
||
"recharge_id": f"RECH_008_{day:03d}",
|
||
"streamer_id": streamer['streamer_id'],
|
||
"recharge_date": (base_date + timedelta(days=day-1)).strftime("%Y-%m-%d"),
|
||
"recharge_amount": float(amount),
|
||
"payment_method": random.choice(["支付宝", "微信支付", "银行卡"]),
|
||
"payment_status": "completed",
|
||
"platform": streamer['platform']
|
||
})
|
||
|
||
logger.info(f"成功生成 {len(recharges)} 条充值记录")
|
||
return recharges
|
||
|
||
def generate_tax_declarations(self, streamers: List[Dict]) -> List[Dict]:
|
||
"""生成税务申报数据 - 与充值数据对应的申报"""
|
||
logger.info("生成税务申报数据...")
|
||
declarations = []
|
||
base_date = datetime(2024, 1, 31) # 月末申报
|
||
|
||
# 场景1: 严重漏报 - 申报10万(实际充值约50万)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_001')
|
||
declarations.append({
|
||
"declaration_id": f"TAX_001_2024-01",
|
||
"tax_no": streamer['tax_no'],
|
||
"streamer_id": streamer['streamer_id'],
|
||
"declaration_date": base_date.strftime("%Y-%m-%d"),
|
||
"declared_amount": 100000.0, # 只申报10万
|
||
"tax_rate": 0.13,
|
||
"tax_amount": 13000.0,
|
||
"declaration_period": "2024-01",
|
||
"status": "submitted"
|
||
})
|
||
|
||
# 场景2: 高风险 - 申报18万(实际充值约30万)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_002')
|
||
declarations.append({
|
||
"declaration_id": f"TAX_002_2024-01",
|
||
"tax_no": streamer['tax_no'],
|
||
"streamer_id": streamer['streamer_id'],
|
||
"declaration_date": base_date.strftime("%Y-%m-%d"),
|
||
"declared_amount": 180000.0,
|
||
"tax_rate": 0.13,
|
||
"tax_amount": 23400.0,
|
||
"declaration_period": "2024-01",
|
||
"status": "submitted"
|
||
})
|
||
|
||
# 场景3: 中风险 - 申报8.5万(实际充值约10万)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_003')
|
||
declarations.append({
|
||
"declaration_id": f"TAX_003_2024-01",
|
||
"tax_no": streamer['tax_no'],
|
||
"streamer_id": streamer['streamer_id'],
|
||
"declaration_date": base_date.strftime("%Y-%m-%d"),
|
||
"declared_amount": 85000.0,
|
||
"tax_rate": 0.13,
|
||
"tax_amount": 11050.0,
|
||
"declaration_period": "2024-01",
|
||
"status": "submitted"
|
||
})
|
||
|
||
# 场景4: 低风险 - 申报4.7万(实际充值约5万)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_004')
|
||
declarations.append({
|
||
"declaration_id": f"TAX_004_2024-01",
|
||
"tax_no": streamer['tax_no'],
|
||
"streamer_id": streamer['streamer_id'],
|
||
"declaration_date": base_date.strftime("%Y-%m-%d"),
|
||
"declared_amount": 47000.0,
|
||
"tax_rate": 0.13,
|
||
"tax_amount": 6110.0,
|
||
"declaration_period": "2024-01",
|
||
"status": "submitted"
|
||
})
|
||
|
||
# 场景5: 正常 - 申报9.8万(实际充值约10万)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_005')
|
||
declarations.append({
|
||
"declaration_id": f"TAX_005_2024-01",
|
||
"tax_no": streamer['tax_no'],
|
||
"streamer_id": streamer['streamer_id'],
|
||
"declaration_date": base_date.strftime("%Y-%m-%d"),
|
||
"declared_amount": 98000.0,
|
||
"tax_rate": 0.13,
|
||
"tax_amount": 12740.0,
|
||
"declaration_period": "2024-01",
|
||
"status": "submitted"
|
||
})
|
||
|
||
# 场景6: 完全未申报 - 申报0(实际充值约80万)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_006')
|
||
declarations.append({
|
||
"declaration_id": f"TAX_006_2024-01",
|
||
"tax_no": streamer['tax_no'],
|
||
"streamer_id": streamer['streamer_id'],
|
||
"declaration_date": base_date.strftime("%Y-%m-%d"),
|
||
"declared_amount": 0.0, # 完全未申报
|
||
"tax_rate": 0.13,
|
||
"tax_amount": 0.0,
|
||
"declaration_period": "2024-01",
|
||
"status": "submitted"
|
||
})
|
||
|
||
# 场景7: 多平台收入 - 申报5万(实际充值约15万)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_007')
|
||
declarations.append({
|
||
"declaration_id": f"TAX_007_2024-01",
|
||
"tax_no": streamer['tax_no'],
|
||
"streamer_id": streamer['streamer_id'],
|
||
"declaration_date": base_date.strftime("%Y-%m-%d"),
|
||
"declared_amount": 50000.0,
|
||
"tax_rate": 0.13,
|
||
"tax_amount": 6500.0,
|
||
"declaration_period": "2024-01",
|
||
"status": "submitted"
|
||
})
|
||
|
||
# 场景8: 分批申报 - 申报6万(实际充值约12万,分批只申报一半)
|
||
streamer = next(s for s in streamers if s['streamer_id'] == 'TEST_008')
|
||
declarations.append({
|
||
"declaration_id": f"TAX_008_2024-01",
|
||
"tax_no": streamer['tax_no'],
|
||
"streamer_id": streamer['streamer_id'],
|
||
"declaration_date": base_date.strftime("%Y-%m-%d"),
|
||
"declared_amount": 60000.0,
|
||
"tax_rate": 0.13,
|
||
"tax_amount": 7800.0,
|
||
"declaration_period": "2024-01",
|
||
"status": "submitted"
|
||
})
|
||
|
||
logger.info(f"成功生成 {len(declarations)} 条税务申报记录")
|
||
return declarations
|
||
|
||
def generate_test_scenarios(self) -> Dict[str, Dict]:
|
||
"""生成测试场景说明"""
|
||
scenarios = {
|
||
"scenario_1": {
|
||
"name": "严重漏报场景",
|
||
"streamer_id": "TEST_001",
|
||
"expected_risk_level": "CRITICAL",
|
||
"expected_risk_score": "90-100",
|
||
"description": "充值50万,仅申报10万,差异率80%,差异金额40万",
|
||
"test_focus": "测试严重风险检测能力"
|
||
},
|
||
"scenario_2": {
|
||
"name": "高风险场景",
|
||
"streamer_id": "TEST_002",
|
||
"expected_risk_level": "HIGH",
|
||
"expected_risk_score": "75-90",
|
||
"description": "充值30万,申报18万,差异率40%,差异金额12万",
|
||
"test_focus": "测试高风险检测能力"
|
||
},
|
||
"scenario_3": {
|
||
"name": "中风险场景",
|
||
"streamer_id": "TEST_003",
|
||
"expected_risk_level": "MEDIUM",
|
||
"expected_risk_score": "50-75",
|
||
"description": "充值10万,申报8.5万,差异率15%,差异金额1.5万",
|
||
"test_focus": "测试中风险检测能力"
|
||
},
|
||
"scenario_4": {
|
||
"name": "低风险场景",
|
||
"streamer_id": "TEST_004",
|
||
"expected_risk_level": "LOW",
|
||
"expected_risk_score": "25-50",
|
||
"description": "充值5万,申报4.7万,差异率6%,差异金额3000元",
|
||
"test_focus": "测试低风险检测能力"
|
||
},
|
||
"scenario_5": {
|
||
"name": "正常场景",
|
||
"streamer_id": "TEST_005",
|
||
"expected_risk_level": "LOW",
|
||
"expected_risk_score": "0-25",
|
||
"description": "充值10万,申报9.8万,差异率2%,差异金额2000元",
|
||
"test_focus": "测试正常数据处理能力"
|
||
},
|
||
"scenario_6": {
|
||
"name": "完全未申报场景",
|
||
"streamer_id": "TEST_006",
|
||
"expected_risk_level": "CRITICAL",
|
||
"expected_risk_score": "95-100",
|
||
"description": "充值80万,申报0,差异率100%,差异金额80万",
|
||
"test_focus": "测试极端情况检测能力"
|
||
},
|
||
"scenario_7": {
|
||
"name": "多平台收入场景",
|
||
"streamer_id": "TEST_007",
|
||
"expected_risk_level": "HIGH",
|
||
"expected_risk_score": "80-90",
|
||
"description": "充值15万,申报5万,差异率67%,差异金额10万",
|
||
"test_focus": "测试多平台收入检测能力"
|
||
},
|
||
"scenario_8": {
|
||
"name": "分批申报场景",
|
||
"streamer_id": "TEST_008",
|
||
"expected_risk_level": "MEDIUM",
|
||
"expected_risk_score": "60-75",
|
||
"description": "充值12万,申报6万,差异率50%,差异金额6万",
|
||
"test_focus": "测试分批申报检测能力"
|
||
}
|
||
}
|
||
return scenarios
|
||
|
||
def save_data(self, data: Any, filename: str):
|
||
"""保存数据到文件"""
|
||
filepath = os.path.join(self.output_dir, filename)
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2, default=str)
|
||
logger.info(f"数据已保存到: {filepath}")
|
||
|
||
def generate_all(self):
|
||
"""生成所有测试数据"""
|
||
logger.info("开始生成收入完整性检测专项测试数据...")
|
||
|
||
# 1. 生成基础数据
|
||
streamers = self.generate_streamer_info()
|
||
contracts = self.generate_revenue_sharing_contracts(streamers)
|
||
recharges = self.generate_recharge_data(streamers)
|
||
declarations = self.generate_tax_declarations(streamers)
|
||
scenarios = self.generate_test_scenarios()
|
||
|
||
# 2. 保存数据
|
||
self.save_data(streamers, "streamers.json")
|
||
self.save_data(contracts, "contracts.json")
|
||
self.save_data(recharges, "recharges.json")
|
||
self.save_data(declarations, "tax_declarations.json")
|
||
self.save_data(scenarios, "test_scenarios.json")
|
||
|
||
# 3. 生成汇总报告
|
||
summary = {
|
||
"generation_time": datetime.now().isoformat(),
|
||
"description": "收入完整性检测算法专项测试数据集",
|
||
"data_count": {
|
||
"streamers": len(streamers),
|
||
"contracts": len(contracts),
|
||
"recharges": len(recharges),
|
||
"tax_declarations": len(declarations),
|
||
"test_scenarios": len(scenarios)
|
||
},
|
||
"total_records": len(streamers) + len(contracts) + len(recharges) + len(declarations),
|
||
"risk_scenarios": list(scenarios.keys())
|
||
}
|
||
self.save_data(summary, "summary.json")
|
||
|
||
# 4. 打印统计信息
|
||
logger.info("=" * 60)
|
||
logger.info("收入完整性检测专项测试数据生成完成!")
|
||
logger.info("=" * 60)
|
||
logger.info(f"总计生成 {summary['total_records']} 条记录")
|
||
logger.info(f"包含 {len(scenarios)} 个测试场景")
|
||
logger.info("")
|
||
logger.info("测试场景:")
|
||
for scenario_id, scenario in scenarios.items():
|
||
logger.info(f" {scenario_id}: {scenario['name']} - {scenario['expected_risk_level']}")
|
||
|
||
# 5. 生成统计表
|
||
self.print_statistics(recharges, declarations, scenarios)
|
||
|
||
return summary
|
||
|
||
def print_statistics(self, recharges: List[Dict], declarations: List[Dict], scenarios: Dict):
|
||
"""打印详细统计信息"""
|
||
print("\n" + "=" * 80)
|
||
print("收入完整性检测算法测试数据统计表")
|
||
print("=" * 80)
|
||
|
||
for scenario_id, scenario in scenarios.items():
|
||
streamer_id = scenario['streamer_id']
|
||
|
||
# 计算该主播的充值总额
|
||
streamer_recharges = [r for r in recharges if r['streamer_id'] == streamer_id]
|
||
recharge_total = sum(r['recharge_amount'] for r in streamer_recharges)
|
||
|
||
# 获取对应的申报数据
|
||
declaration = next((d for d in declarations if d['streamer_id'] == streamer_id), None)
|
||
declared_amount = declaration['declared_amount'] if declaration else 0
|
||
|
||
# 计算差异
|
||
difference = recharge_total - declared_amount
|
||
difference_rate = (difference / recharge_total * 100) if recharge_total > 0 else 0
|
||
|
||
print(f"\n场景 {scenario_id}: {scenario['name']}")
|
||
print(f" 预期风险等级: {scenario['expected_risk_level']}")
|
||
print(f" 充值总额: ¥{recharge_total:,.2f}")
|
||
print(f" 申报金额: ¥{declared_amount:,.2f}")
|
||
print(f" 差异金额: ¥{difference:,.2f}")
|
||
print(f" 差异率: {difference_rate:.2f}%")
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("=" * 80)
|
||
print("收入完整性检测算法 - 专项测试数据生成器")
|
||
print("=" * 80)
|
||
print()
|
||
print("本工具将为收入完整性检测算法生成8个不同风险等级的测试场景:")
|
||
print(" 1. 严重漏报 (CRITICAL)")
|
||
print(" 2. 高风险 (HIGH)")
|
||
print(" 3. 中风险 (MEDIUM)")
|
||
print(" 4. 低风险 (LOW)")
|
||
print(" 5. 正常 (LOW)")
|
||
print(" 6. 完全未申报 (CRITICAL)")
|
||
print(" 7. 多平台收入 (HIGH)")
|
||
print(" 8. 分批申报 (MEDIUM)")
|
||
print("=" * 80)
|
||
print()
|
||
|
||
generator = RevenueTestDataGenerator()
|
||
|
||
try:
|
||
generator.generate_all()
|
||
print("\n✅ 测试数据生成成功!")
|
||
print(f"📂 数据位置: /Users/liulujian/Documents/code/deeprisk-claude-1/backend/test_data/revenue_test/")
|
||
print("\n📋 使用方法:")
|
||
print(" 1. 查看测试场景: cat test_scenarios.json")
|
||
print(" 2. 查看统计数据: cat summary.json")
|
||
print(" 3. 运行算法测试: 使用 streamer_id 进行测试")
|
||
print("\n🔬 API测试示例:")
|
||
print(' curl -X POST http://localhost:8000/api/v1/detect \\')
|
||
print(' -H "Content-Type: application/json" \\')
|
||
print(' -d \'{"streamer_id": "TEST_001", "period": "2024-01"}\'')
|
||
|
||
except Exception as e:
|
||
logger.error(f"生成测试数据失败: {str(e)}", exc_info=True)
|
||
print(f"\n❌ 生成失败: {str(e)}")
|
||
sys.exit(1)
|
||
|
||
if __name__ == "__main__":
|
||
main()
|