deep-risk/backend/app/services/risk_detection/algorithms/revenue_integrity.py
2025-12-14 20:08:27 +08:00

1039 lines
47 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
收入完整性检测算法
检测平台充值金额与申报收入是否匹配,识别隐瞒收入风险
根据【01 风险审查规则.md】的定义
- 核查平台用户充值记录与税务申报数据的一致性
- 检测未提现收入是否完整申报
- 识别私户收款等隐匿收入行为
- 计算收入差异率并评级
风险等级判定标准:
- CRITICAL严重差异率 > 50% 或 差异金额 > 10万元
- HIGH高风险差异率 > 30% 或 差异金额 > 5万元
- MEDIUM中风险差异率 > 10% 或 差异金额 > 1万元
- LOW低风险差异率 > 5% 或 差异金额 > 5000元
- NONE无风险差异在可接受范围内<= 5% 且 <= 5000元
"""
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, timedelta
from decimal import Decimal
from sqlalchemy import select, func, and_, or_
from sqlalchemy.ext.asyncio import AsyncSession
from loguru import logger
from .base import (
RiskDetectionAlgorithm,
DetectionContext,
DetectionResult,
RiskEvidence,
)
from app.models.risk_detection import RiskLevel
from app.models.streamer import PlatformRecharge, StreamerInfo
from app.models.tax_declaration import TaxDeclaration
from app.models.contract import RevenueSharingContract
class RevenueIntegrityAlgorithm(RiskDetectionAlgorithm):
"""收入完整性检测算法"""
def __init__(self):
super().__init__()
# 默认配置
self.config = {
"critical_rate_threshold": 50.0, # 严重风险差异率阈值 (%)
"critical_amount_threshold": 100000.0, # 严重风险差异金额阈值 (元)
"high_rate_threshold": 30.0, # 高风险差异率阈值 (%)
"high_amount_threshold": 50000.0, # 高风险差异金额阈值 (元)
"medium_rate_threshold": 10.0, # 中风险差异率阈值 (%)
"medium_amount_threshold": 10000.0, # 中风险差异金额阈值 (元)
"low_rate_threshold": 5.0, # 低风险差异率阈值 (%)
"low_amount_threshold": 5000.0, # 低风险差异金额阈值 (元)
"top_records_limit": 10, # 返回TOP充值记录数量
}
def get_algorithm_code(self) -> str:
return "REVENUE_INTEGRITY_CHECK"
def get_algorithm_name(self) -> str:
return "收入完整性检测"
def get_description(self) -> str:
return (
"通过对比平台充值记录与税务申报数据,"
"检测是否存在隐瞒收入、虚报或少报收入的风险"
)
async def validate_data(self, context: DetectionContext):
"""验证数据完整性"""
logger.debug("=" * 80)
logger.debug("[DEBUG] 开始验证数据完整性")
logger.debug("=" * 80)
# 优先从streamer_id获取其次从entity_id获取
streamer_id = context.get_parameter("streamer_id")
logger.debug(f"[DEBUG] 从参数获取到的 streamer_id: {streamer_id}")
if not streamer_id:
entity_id = context.get_parameter("entity_id")
entity_type = context.get_parameter("entity_type")
logger.debug(f"[DEBUG] 未找到 streamer_id尝试从 entity_id 获取: {entity_id}, entity_type: {entity_type}")
if entity_id and entity_type == "streamer":
streamer_id = entity_id
logger.debug(f"[DEBUG] 使用 entity_id 作为 streamer_id: {streamer_id}")
else:
error_msg = "缺少必要参数streamer_id或entity_id实体类型需为streamer"
logger.error(f"[DEBUG] 数据验证失败: {error_msg}")
raise ValueError(error_msg)
period = context.get_parameter("period")
logger.debug(f"[DEBUG] 获取到的 period: {period}")
if not streamer_id:
error_msg = "缺少必要参数streamer_id"
logger.error(f"[DEBUG] 数据验证失败: {error_msg}")
raise ValueError(error_msg)
if not period:
error_msg = "缺少必要参数period"
logger.error(f"[DEBUG] 数据验证失败: {error_msg}")
raise ValueError(error_msg)
# 验证期间格式
logger.debug(f"[DEBUG] 开始验证期间格式: {period}")
try:
start_date, end_date = self._parse_period(period)
logger.debug(f"[DEBUG] 期间格式验证成功")
logger.debug(f"[DEBUG] 解析出的日期范围: {start_date} ~ {end_date}")
except ValueError as e:
error_msg = f"期间格式错误:{str(e)}"
logger.error(f"[DEBUG] {error_msg}")
raise ValueError(error_msg)
logger.debug(f"[DEBUG] 数据验证完成,所有参数有效")
logger.debug("=" * 80)
async def _do_detect(self, context: DetectionContext) -> DetectionResult:
"""执行收入完整性检测"""
logger.debug("=" * 80)
logger.debug("[DEBUG] 开始执行收入完整性检测主流程")
logger.debug("=" * 80)
# 获取参数
# 优先从streamer_id获取其次从entity_id获取
streamer_id = context.get_parameter("streamer_id")
logger.debug(f"[DEBUG] 获取到 streamer_id: {streamer_id}")
if not streamer_id:
entity_id = context.get_parameter("entity_id")
entity_type = context.get_parameter("entity_type")
logger.debug(f"[DEBUG] 未找到 streamer_id从 entity_id 获取: {entity_id}, entity_type: {entity_type}")
if entity_id and entity_type == "streamer":
streamer_id = entity_id
logger.debug(f"[DEBUG] 使用 entity_id 作为 streamer_id: {streamer_id}")
period = context.get_parameter("period") # 格式YYYY-MM
logger.debug(f"[DEBUG] 获取到 period: {period}")
comparison_type = context.get_parameter("comparison_type", "monthly") # monthly/quarterly/yearly
logger.debug(f"[DEBUG] 获取到 comparison_type: {comparison_type}")
db_session = context.db_session
if not db_session:
logger.error("[DEBUG] 缺少数据库会话,无法继续执行")
return self._create_error_result(context, "缺少数据库会话")
logger.debug(f"[DEBUG] 数据库会话已建立")
try:
logger.info(f"开始执行收入完整性检测主播ID={streamer_id}, 期间={period}")
# 1. 获取主播基本信息
logger.debug("[DEBUG] 步骤 1: 获取主播基本信息")
streamer_info = await self._get_streamer_info(db_session, streamer_id)
logger.debug(f"[DEBUG] 主播信息获取结果: {streamer_info is not None}")
if streamer_info:
logger.debug(f"[DEBUG] 主播名称: {streamer_info.get('streamer_name')}, 实体类型: {streamer_info.get('entity_type')}")
if not streamer_info:
logger.error(f"[DEBUG] 找不到主播信息: {streamer_id}")
return self._create_error_result(context, f"找不到主播信息:{streamer_id}")
# 2. 解析期间
logger.debug("[DEBUG] 步骤 2: 解析期间")
start_date, end_date = self._parse_period(period)
logger.debug(f"[DEBUG] 解析出的日期范围: {start_date} ~ {end_date}")
# 3. 获取平台充值数据
logger.debug("[DEBUG] 步骤 3: 获取平台充值数据")
recharge_data = await self._get_recharge_data(
db_session, streamer_id, start_date, end_date
)
logger.debug(f"[DEBUG] 充值数据获取完成")
logger.debug(f"[DEBUG] 充值总额: {recharge_data.get('total')}, 充值记录数: {recharge_data.get('count')}")
# 4. 获取税务申报数据
logger.debug("[DEBUG] 步骤 4: 获取税务申报数据")
declaration_data = await self._get_declaration_data(
db_session, streamer_info, period, start_date, end_date
)
logger.debug(f"[DEBUG] 申报数据获取完成")
logger.debug(f"[DEBUG] 申报总额: {declaration_data.get('total')}, 申报记录数: {declaration_data.get('count')}")
# 5. 获取分成协议
logger.debug("[DEBUG] 步骤 5: 获取分成协议")
contract_ratio = await self._get_contract_ratio(
db_session, streamer_id, start_date, end_date
)
logger.debug(f"[DEBUG] 分成比例获取完成: {contract_ratio}%")
# 6. 计算差异
logger.debug("[DEBUG] 步骤 6: 计算收入差异")
analysis = self._analyze_revenue_gap(
recharge_data, declaration_data, contract_ratio
)
logger.debug(f"[DEBUG] 差异分析完成")
logger.debug(f"[DEBUG] 差异金额: {analysis.get('difference')}, 差异率: {analysis.get('difference_rate')}%")
# 7. 风险评估
logger.debug("[DEBUG] 步骤 7: 风险评估")
risk_level, risk_score = self._calculate_risk_level(
analysis["difference"],
analysis["difference_rate"],
analysis["recharge_total"]
)
logger.debug(f"[DEBUG] 风险等级: {risk_level.value}, 风险评分: {risk_score}")
# 8. 生成风险描述和建议
logger.debug("[DEBUG] 步骤 8: 生成风险描述和建议")
description = self._generate_description(
analysis, streamer_info, period
)
suggestion = self._generate_suggestion(
analysis, risk_level
)
logger.debug(f"[DEBUG] 描述和建议生成完成")
# 9. 创建检测结果
logger.debug("[DEBUG] 步骤 9: 创建检测结果")
result = DetectionResult(
task_id=context.task_id,
rule_id=context.rule_id,
entity_id=streamer_id,
entity_type="streamer",
risk_level=risk_level,
risk_score=risk_score,
description=description,
suggestion=suggestion,
risk_data={
"period": period,
"streamer_name": streamer_info.get("streamer_name", ""),
"entity_type": streamer_info.get("entity_type", ""),
"recharge_total": float(analysis["recharge_total"]),
"recharge_count": analysis["recharge_count"],
"declared_revenue": float(analysis["declared_revenue"]),
"declaration_count": analysis["declaration_count"],
"difference": float(analysis["difference"]),
"difference_rate": float(analysis["difference_rate"]),
"contract_ratio": float(analysis["contract_ratio"]) if analysis["contract_ratio"] else None,
"expected_revenue": float(analysis["expected_revenue"]) if analysis["expected_revenue"] else None,
},
)
logger.debug(f"[DEBUG] 检测结果创建完成")
# 10. 构建证据链
logger.debug("[DEBUG] 步骤 10: 构建证据链")
await self._build_evidence_chain(
result, recharge_data, declaration_data, analysis, period
)
logger.debug(f"[DEBUG] 证据链构建完成,证据数量: {len(result.evidence)}")
logger.info(
f"收入完整性检测完成主播ID={streamer_id}, "
f"风险等级={risk_level.value}, 评分={risk_score}"
)
logger.debug(f"[DEBUG] 检测流程全部完成")
logger.debug("=" * 80)
return result
except Exception as e:
logger.error(f"收入完整性检测执行失败: {str(e)}", exc_info=True)
logger.debug(f"[DEBUG] 异常信息: {str(e)}")
logger.debug("=" * 80)
return self._create_error_result(context, f"检测执行失败: {str(e)}")
async def _get_streamer_info(
self,
db_session: AsyncSession,
streamer_id: str
) -> Optional[Dict[str, Any]]:
"""获取主播基本信息"""
logger.debug(f"[DEBUG] >>> _get_streamer_info: 开始获取主播信息, streamer_id={streamer_id}")
stmt = select(StreamerInfo).where(StreamerInfo.streamer_id == streamer_id)
logger.debug(f"[DEBUG] 执行SQL查询: {stmt}")
result = await db_session.execute(stmt)
streamer = result.scalar_one_or_none()
if not streamer:
logger.warning(f"[DEBUG] 未找到主播信息: {streamer_id}")
logger.debug(f"[DEBUG] <<< _get_streamer_info: 返回 None")
return None
streamer_info = {
"streamer_id": streamer.streamer_id,
"streamer_name": streamer.streamer_name,
"entity_type": streamer.entity_type,
"tax_registration_no": streamer.tax_registration_no,
"unified_social_credit_code": streamer.unified_social_credit_code,
"id_card_no": streamer.id_card_no,
}
logger.debug(f"[DEBUG] 主播信息查询成功")
logger.debug(f"[DEBUG] 主播名称: {streamer.streamer_name}, 实体类型: {streamer.entity_type}")
logger.debug(f"[DEBUG] 税号信息: 税务登记号={streamer.tax_registration_no}, 统一社会信用代码={streamer.unified_social_credit_code}, 身份证号={streamer.id_card_no}")
logger.debug(f"[DEBUG] <<< _get_streamer_info: 返回主播信息")
return streamer_info
async def _get_recharge_data(
self,
db_session: AsyncSession,
streamer_id: str,
start_date: datetime,
end_date: datetime
) -> Dict[str, Any]:
"""
获取平台充值数据
返回:
{
"total": Decimal, # 总金额
"count": int, # 记录数
"top_records": List[Dict], # TOP充值记录
}
"""
logger.debug(f"[DEBUG] >>> _get_recharge_data: 开始获取充值数据")
logger.debug(f"[DEBUG] 参数: streamer_id={streamer_id}, start_date={start_date}, end_date={end_date}")
# 查询充值总额和记录数
logger.debug(f"[DEBUG] 查询充值总额和记录数")
stmt_summary = select(
func.count(PlatformRecharge.id).label("count"),
func.coalesce(func.sum(PlatformRecharge.actual_amount_cny), 0).label("total")
).where(
and_(
PlatformRecharge.user_id == streamer_id,
PlatformRecharge.recharge_time >= start_date,
PlatformRecharge.recharge_time <= end_date,
PlatformRecharge.status == "success",
)
)
logger.debug(f"[DEBUG] 执行SQL: {stmt_summary}")
result = await db_session.execute(stmt_summary)
summary = result.one()
logger.debug(f"[DEBUG] 查询结果: count={summary.count}, total={summary.total}")
# 查询TOP充值记录
top_limit = self.config.get("top_records_limit", 10)
logger.debug(f"[DEBUG] 查询TOP {top_limit} 充值记录")
stmt_top = select(PlatformRecharge).where(
and_(
PlatformRecharge.user_id == streamer_id,
PlatformRecharge.recharge_time >= start_date,
PlatformRecharge.recharge_time <= end_date,
PlatformRecharge.status == "success",
)
).order_by(
PlatformRecharge.actual_amount_cny.desc()
).limit(top_limit)
logger.debug(f"[DEBUG] 执行SQL: {stmt_top}")
result_top = await db_session.execute(stmt_top)
top_records = result_top.scalars().all()
logger.debug(f"[DEBUG] TOP记录查询完成记录数: {len(top_records)}")
recharge_data = {
"total": Decimal(str(summary.total)),
"count": summary.count,
"top_records": [
{
"recharge_id": r.recharge_id,
"user_name": r.user_name,
"amount": float(r.actual_amount_cny),
"time": r.recharge_time.strftime("%Y-%m-%d %H:%M:%S"),
"payment_method": r.payment_method,
}
for r in top_records
]
}
logger.debug(f"[DEBUG] 充值数据汇总:")
logger.debug(f"[DEBUG] 总金额: {recharge_data['total']}")
logger.debug(f"[DEBUG] 记录数: {recharge_data['count']}")
logger.debug(f"[DEBUG] TOP记录数: {len(recharge_data['top_records'])}")
if recharge_data['top_records']:
logger.debug(f"[DEBUG] 最高充值金额: {recharge_data['top_records'][0]['amount']}")
logger.debug(f"[DEBUG] <<< _get_recharge_data: 返回充值数据")
return recharge_data
async def _get_declaration_data(
self,
db_session: AsyncSession,
streamer_info: Dict[str, Any],
period: str,
start_date: datetime,
end_date: datetime
) -> Dict[str, Any]:
"""
获取税务申报数据
返回:
{
"total": Decimal, # 申报总收入
"count": int, # 申报记录数
"records": List[Dict], # 申报记录
}
"""
logger.debug(f"[DEBUG] >>> _get_declaration_data: 开始获取申报数据")
logger.debug(f"[DEBUG] 主播ID: {streamer_info.get('streamer_id')}, 期间: {period}")
logger.debug(f"[DEBUG] 日期范围: {start_date} ~ {end_date}")
# 构建税号查询条件(个人可能用身份证,企业用统一社会信用代码)
taxpayer_ids = []
if streamer_info.get("tax_registration_no"):
taxpayer_ids.append(streamer_info["tax_registration_no"])
logger.debug(f"[DEBUG] 税务登记号: {streamer_info['tax_registration_no']}")
if streamer_info.get("unified_social_credit_code"):
taxpayer_ids.append(streamer_info["unified_social_credit_code"])
logger.debug(f"[DEBUG] 统一社会信用代码: {streamer_info['unified_social_credit_code']}")
if streamer_info.get("id_card_no"):
taxpayer_ids.append(streamer_info["id_card_no"])
logger.debug(f"[DEBUG] 身份证号: {streamer_info['id_card_no']}")
if not taxpayer_ids:
logger.warning(f"[DEBUG] 主播 {streamer_info['streamer_id']} 没有税号信息,无法查询申报数据")
logger.debug(f"[DEBUG] <<< _get_declaration_data: 返回空数据")
return {"total": Decimal("0"), "count": 0, "records": []}
logger.debug(f"[DEBUG] 查询税号列表: {taxpayer_ids}")
# 查询税务申报记录
logger.debug(f"[DEBUG] 查询税务申报记录")
stmt = select(TaxDeclaration).where(
and_(
TaxDeclaration.taxpayer_id.in_(taxpayer_ids),
or_(
TaxDeclaration.tax_period == period, # 月度申报
and_( # 期间范围查询
TaxDeclaration.declaration_date >= start_date,
TaxDeclaration.declaration_date <= end_date,
)
)
)
)
logger.debug(f"[DEBUG] 执行SQL: {stmt}")
result = await db_session.execute(stmt)
declarations = result.scalars().all()
logger.debug(f"[DEBUG] 查询到申报记录数: {len(declarations)}")
total_revenue = Decimal("0")
records = []
for decl in declarations:
revenue = Decimal(str(decl.sales_revenue or 0))
total_revenue += revenue
records.append({
"declaration_id": decl.vat_declaration_id,
"taxpayer_name": decl.taxpayer_name,
"tax_period": decl.tax_period,
"sales_revenue": float(revenue),
"declaration_date": decl.declaration_date.strftime("%Y-%m-%d") if decl.declaration_date else None,
})
logger.debug(f"[DEBUG] 申报记录: {decl.vat_declaration_id}, 销售收入: {revenue}, 申报期间: {decl.tax_period}")
declaration_data = {
"total": total_revenue,
"count": len(records),
"records": records,
}
logger.debug(f"[DEBUG] 申报数据汇总:")
logger.debug(f"[DEBUG] 总收入: {declaration_data['total']}")
logger.debug(f"[DEBUG] 记录数: {declaration_data['count']}")
logger.debug(f"[DEBUG] <<< _get_declaration_data: 返回申报数据")
return declaration_data
async def _get_contract_ratio(
self,
db_session: AsyncSession,
streamer_id: str,
start_date: datetime,
end_date: datetime
) -> Optional[Decimal]:
"""
获取主播分成比例
从分成协议表中获取主播的分成比例
"""
logger.debug(f"[DEBUG] >>> _get_contract_ratio: 开始获取分成协议")
logger.debug(f"[DEBUG] 参数: streamer_id={streamer_id}, start_date={start_date}, end_date={end_date}")
stmt = select(RevenueSharingContract).where(
and_(
RevenueSharingContract.streamer_id == streamer_id,
RevenueSharingContract.contract_start_date <= end_date,
or_(
RevenueSharingContract.contract_end_date.is_(None),
RevenueSharingContract.contract_end_date >= start_date,
),
RevenueSharingContract.contract_status == "active",
)
).order_by(
RevenueSharingContract.contract_start_date.desc()
).limit(1)
logger.debug(f"[DEBUG] 执行SQL: {stmt}")
result = await db_session.execute(stmt)
contract = result.scalar_one_or_none()
if contract:
logger.debug(f"[DEBUG] 找到分成协议")
logger.debug(f"[DEBUG] 协议开始日期: {contract.contract_start_date}")
logger.debug(f"[DEBUG] 协议结束日期: {contract.contract_end_date}")
logger.debug(f"[DEBUG] 协议状态: {contract.contract_status}")
logger.debug(f"[DEBUG] 主播分成比例: {contract.streamer_ratio}%")
else:
logger.debug(f"[DEBUG] 未找到有效的分成协议")
if contract and contract.streamer_ratio:
ratio = Decimal(str(contract.streamer_ratio)) * Decimal("100")
logger.debug(f"[DEBUG] <<< _get_contract_ratio: 返回分成比例 {ratio}%")
return ratio
logger.debug(f"[DEBUG] <<< _get_contract_ratio: 返回 None无分成比例")
return None
def _analyze_revenue_gap(
self,
recharge_data: Dict[str, Any],
declaration_data: Dict[str, Any],
contract_ratio: Optional[Decimal]
) -> Dict[str, Any]:
"""
分析收入差异
返回:
{
"recharge_total": Decimal, # 平台充值总额
"recharge_count": int,
"declared_revenue": Decimal, # 申报收入
"declaration_count": int,
"contract_ratio": Optional[Decimal], # 分成比例
"expected_revenue": Optional[Decimal], # 预期收入(基于分成比例)
"difference": Decimal, # 差异金额
"difference_rate": Decimal, # 差异率 (%)
}
"""
logger.debug(f"[DEBUG] >>> _analyze_revenue_gap: 开始分析收入差异")
logger.debug(f"[DEBUG] 充值数据: 总金额={recharge_data['total']}, 记录数={recharge_data['count']}")
logger.debug(f"[DEBUG] 申报数据: 总收入={declaration_data['total']}, 记录数={declaration_data['count']}")
logger.debug(f"[DEBUG] 分成比例: {contract_ratio}%")
recharge_total = recharge_data["total"]
declared_revenue = declaration_data["total"]
# 计算预期收入(如果有分成比例)
expected_revenue = None
if contract_ratio is not None:
expected_revenue = recharge_total * contract_ratio / Decimal("100")
logger.debug(f"[DEBUG] 计算预期收入: {recharge_total} * {contract_ratio}% = {expected_revenue}")
else:
logger.debug(f"[DEBUG] 无分成比例,不计算预期收入")
# 计算差异(使用预期收入或充值总额)
if expected_revenue is not None:
difference = expected_revenue - declared_revenue
base_amount = expected_revenue
logger.debug(f"[DEBUG] 基于预期收入计算差异: {expected_revenue} - {declared_revenue} = {difference}")
else:
difference = recharge_total - declared_revenue
base_amount = recharge_total
logger.debug(f"[DEBUG] 基于充值总额计算差异: {recharge_total} - {declared_revenue} = {difference}")
# 计算差异率
if base_amount > 0:
difference_rate = (abs(difference) / base_amount) * Decimal("100")
logger.debug(f"[DEBUG] 计算差异率: |{difference}| / {base_amount} * 100 = {difference_rate}%")
else:
difference_rate = Decimal("0")
logger.debug(f"[DEBUG] 基准金额为0差异率为0%")
analysis = {
"recharge_total": recharge_total,
"recharge_count": recharge_data["count"],
"declared_revenue": declared_revenue,
"declaration_count": declaration_data["count"],
"contract_ratio": contract_ratio,
"expected_revenue": expected_revenue,
"difference": difference,
"difference_rate": difference_rate,
}
logger.debug(f"[DEBUG] 差异分析结果:")
logger.debug(f"[DEBUG] 充值总额: {analysis['recharge_total']}")
logger.debug(f"[DEBUG] 申报收入: {analysis['declared_revenue']}")
logger.debug(f"[DEBUG] 预期收入: {analysis['expected_revenue']}")
logger.debug(f"[DEBUG] 差异金额: {analysis['difference']}")
logger.debug(f"[DEBUG] 差异率: {analysis['difference_rate']}%")
logger.debug(f"[DEBUG] <<< _analyze_revenue_gap: 返回差异分析结果")
return analysis
def _calculate_risk_level(
self,
difference: Decimal,
difference_rate: Decimal,
recharge_total: Decimal
) -> Tuple[RiskLevel, float]:
"""
计算风险等级和评分
风险判定逻辑:
1. 基于差异率和差异金额双重判断
2. 满足任一条件即判定为该风险等级
3. 评分基于风险等级和差异程度
返回:(RiskLevel, risk_score)
"""
logger.debug(f"[DEBUG] >>> _calculate_risk_level: 开始计算风险等级")
logger.debug(f"[DEBUG] 参数: 差异金额={difference}, 差异率={difference_rate}%, 充值总额={recharge_total}")
if recharge_total == 0:
logger.debug(f"[DEBUG] 充值总额为0返回无风险")
logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.NONE, 0.0")
return RiskLevel.NONE, 0.0
# 只关注收入少报风险difference > 0
if difference <= 0:
# 申报收入超过充值金额,风险较低
logger.debug(f"[DEBUG] 差异金额 <= 0申报收入 >= 预期/充值金额),风险较低")
logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.LOW, 20.0")
return RiskLevel.LOW, 20.0
logger.debug(f"[DEBUG] 检测到收入少报风险(差异金额 > 0开始风险等级判定")
# 获取阈值配置
critical_rate = Decimal(str(self.config.get("critical_rate_threshold", 50.0)))
critical_amount = Decimal(str(self.config.get("critical_amount_threshold", 100000.0)))
high_rate = Decimal(str(self.config.get("high_rate_threshold", 30.0)))
high_amount = Decimal(str(self.config.get("high_amount_threshold", 50000.0)))
medium_rate = Decimal(str(self.config.get("medium_rate_threshold", 10.0)))
medium_amount = Decimal(str(self.config.get("medium_amount_threshold", 10000.0)))
low_rate = Decimal(str(self.config.get("low_rate_threshold", 5.0)))
low_amount = Decimal(str(self.config.get("low_amount_threshold", 5000.0)))
logger.debug(f"[DEBUG] 风险阈值配置:")
logger.debug(f"[DEBUG] 严重: 差异率>{critical_rate}% 或 差异金额>{critical_amount}")
logger.debug(f"[DEBUG] 高: 差异率>{high_rate}% 或 差异金额>{high_amount}")
logger.debug(f"[DEBUG] 中: 差异率>{medium_rate}% 或 差异金额>{medium_amount}")
logger.debug(f"[DEBUG] 低: 差异率>{low_rate}% 或 差异金额>{low_amount}")
# 判断风险等级
if difference_rate > critical_rate or difference > critical_amount:
# 严重风险:评分 85-100
score = 85.0 + min(15.0, float(difference_rate - critical_rate) / 10.0)
final_score = min(100.0, score)
logger.debug(f"[DEBUG] 判定为严重风险")
logger.debug(f"[DEBUG] 差异率 {difference_rate}% > {critical_rate}% 或 差异金额 {difference} > {critical_amount}")
logger.debug(f"[DEBUG] 评分: {final_score}")
logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.CRITICAL, {final_score}")
return RiskLevel.CRITICAL, final_score
elif difference_rate > high_rate or difference > high_amount:
# 高风险:评分 70-85
score = 70.0 + min(15.0, float(difference_rate - high_rate) / 5.0)
final_score = min(85.0, score)
logger.debug(f"[DEBUG] 判定为高风险")
logger.debug(f"[DEBUG] 差异率 {difference_rate}% > {high_rate}% 或 差异金额 {difference} > {high_amount}")
logger.debug(f"[DEBUG] 评分: {final_score}")
logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.HIGH, {final_score}")
return RiskLevel.HIGH, final_score
elif difference_rate > medium_rate or difference > medium_amount:
# 中风险:评分 50-70
score = 50.0 + min(20.0, float(difference_rate - medium_rate) / 2.0)
final_score = min(70.0, score)
logger.debug(f"[DEBUG] 判定为中风险")
logger.debug(f"[DEBUG] 差异率 {difference_rate}% > {medium_rate}% 或 差异金额 {difference} > {medium_amount}")
logger.debug(f"[DEBUG] 评分: {final_score}")
logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.MEDIUM, {final_score}")
return RiskLevel.MEDIUM, final_score
elif difference_rate > low_rate or difference > low_amount:
# 低风险:评分 20-50
score = 20.0 + min(30.0, float(difference_rate - low_rate) * 6.0)
final_score = min(50.0, score)
logger.debug(f"[DEBUG] 判定为低风险")
logger.debug(f"[DEBUG] 差异率 {difference_rate}% > {low_rate}% 或 差异金额 {difference} > {low_amount}")
logger.debug(f"[DEBUG] 评分: {final_score}")
logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.LOW, {final_score}")
return RiskLevel.LOW, final_score
else:
# 无风险:评分 0-20
score = min(20.0, float(difference_rate) * 4.0)
logger.debug(f"[DEBUG] 判定为无风险")
logger.debug(f"[DEBUG] 差异率 {difference_rate}% <= {low_rate}% 且 差异金额 {difference} <= {low_amount}")
logger.debug(f"[DEBUG] 评分: {score}")
logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.NONE, {score}")
return RiskLevel.NONE, score
def _generate_description(
self,
analysis: Dict[str, Any],
streamer_info: Dict[str, Any],
period: str
) -> str:
"""生成风险描述"""
logger.debug(f"[DEBUG] >>> _generate_description: 开始生成风险描述")
recharge_total = analysis["recharge_total"]
declared_revenue = analysis["declared_revenue"]
difference = analysis["difference"]
difference_rate = analysis["difference_rate"]
expected_revenue = analysis.get("expected_revenue")
contract_ratio = analysis.get("contract_ratio")
streamer_name = streamer_info.get("streamer_name", "")
entity_type_map = {
"individual": "个人",
"individual_business": "个体工商户",
"enterprise": "企业",
}
entity_type = entity_type_map.get(
streamer_info.get("entity_type", ""),
streamer_info.get("entity_type", "")
)
logger.debug(f"[DEBUG] 描述生成参数:")
logger.debug(f"[DEBUG] 主播名称: {streamer_name}")
logger.debug(f"[DEBUG] 实体类型: {entity_type}")
logger.debug(f"[DEBUG] 期间: {period}")
logger.debug(f"[DEBUG] 充值总额: {recharge_total}")
logger.debug(f"[DEBUG] 申报收入: {declared_revenue}")
logger.debug(f"[DEBUG] 预期收入: {expected_revenue}")
logger.debug(f"[DEBUG] 分成比例: {contract_ratio}%")
logger.debug(f"[DEBUG] 差异金额: {difference}")
logger.debug(f"[DEBUG] 差异率: {difference_rate}%")
if difference > 0:
# 收入少报
logger.debug(f"[DEBUG] 生成收入少报描述")
if expected_revenue is not None:
desc = (
f"检测到收入完整性风险:主播【{streamer_name}】({entity_type}"
f"{period} 期间,平台充值总额为 {recharge_total:,.2f}元,"
f"按分成比例 {contract_ratio}% 计算,预期申报收入为 {expected_revenue:,.2f}元,"
f"但实际申报收入仅为 {declared_revenue:,.2f}元,"
f"存在 {difference:,.2f}元差额(差异率 {difference_rate:.2f}%"
f"可能存在隐瞒收入或少报收入的情况。"
)
else:
desc = (
f"检测到收入完整性风险:主播【{streamer_name}】({entity_type}"
f"{period} 期间,平台充值总额为 {recharge_total:,.2f}元,"
f"但申报收入仅为 {declared_revenue:,.2f}元,"
f"存在 {difference:,.2f}元差额(差异率 {difference_rate:.2f}%"
f"可能存在隐瞒收入或少报收入的情况。"
)
elif difference < 0:
# 申报收入超过充值金额
logger.debug(f"[DEBUG] 生成申报收入超限描述")
desc = (
f"主播【{streamer_name}】({entity_type})在 {period} 期间,"
f"申报收入 {declared_revenue:,.2f}元 超过平台充值总额 {recharge_total:,.2f}元,"
f"超出 {abs(difference):,.2f}元,可能存在虚报收入或数据录入错误。"
)
else:
# 数据一致
logger.debug(f"[DEBUG] 生成数据一致描述")
desc = (
f"主播【{streamer_name}】({entity_type})在 {period} 期间,"
f"收入完整性检查通过,平台充值与申报收入基本一致。"
)
logger.debug(f"[DEBUG] 描述生成完成")
logger.debug(f"[DEBUG] <<< _generate_description: 返回风险描述")
return desc
def _generate_suggestion(
self,
analysis: Dict[str, Any],
risk_level: RiskLevel
) -> str:
"""生成整改建议"""
logger.debug(f"[DEBUG] >>> _generate_suggestion: 开始生成整改建议")
logger.debug(f"[DEBUG] 风险等级: {risk_level.value}")
difference = analysis["difference"]
logger.debug(f"[DEBUG] 差异金额: {difference}")
if risk_level == RiskLevel.CRITICAL:
logger.debug(f"[DEBUG] 生成严重风险建议")
if difference > 0:
suggestion = (
"【严重风险】发现重大收入差异,建议立即采取以下措施:\n"
"1. 立即核实平台充值记录与实际收入的对应关系;\n"
"2. 检查是否存在未申报的收入,特别是私户收款;\n"
"3. 核查分成协议的执行情况;\n"
"4. 补充申报遗漏的收入,并计算应补缴税款;\n"
"5. 提供相关证明材料说明差额原因;\n"
"6. 建议聘请专业税务顾问进行全面审查。"
)
else:
suggestion = (
"【严重风险】申报收入大幅超过充值金额,建议:\n"
"1. 核实申报数据的准确性;\n"
"2. 检查是否存在重复申报;\n"
"3. 检查是否有其他收入来源;\n"
"4. 提供收入来源的详细说明和证明材料。"
)
elif risk_level == RiskLevel.HIGH:
logger.debug(f"[DEBUG] 生成高风险建议")
if difference > 0:
suggestion = (
"【高风险】发现较大收入差异,建议:\n"
"1. 核实平台充值记录与实际收入的对应关系;\n"
"2. 检查是否存在未申报的收入;\n"
"3. 补充申报遗漏的收入;\n"
"4. 提供相关证明材料说明差额原因;\n"
"5. 加强收入管理和申报流程规范。"
)
else:
suggestion = (
"【高风险】申报收入超过充值金额较多,建议:\n"
"1. 核实申报数据的准确性;\n"
"2. 检查是否存在数据录入错误;\n"
"3. 提供收入来源的详细说明。"
)
elif risk_level == RiskLevel.MEDIUM:
logger.debug(f"[DEBUG] 生成中风险建议")
if difference > 0:
suggestion = (
"【中风险】发现一定收入差异,建议:\n"
"1. 核对平台充值与申报数据的一致性;\n"
"2. 检查未提现收入的申报情况;\n"
"3. 补充说明差额原因;\n"
"4. 完善收入记录和申报流程。"
)
else:
suggestion = (
"【中风险】申报收入略超充值金额,建议:\n"
"1. 核对申报数据;\n"
"2. 说明其他收入来源。"
)
elif risk_level == RiskLevel.LOW:
logger.debug(f"[DEBUG] 生成低风险建议")
if difference > 0:
suggestion = (
"【低风险】发现少量收入差异,建议:\n"
"1. 核对收入记录的完整性;\n"
"2. 说明差额原因;\n"
"3. 继续保持良好的申报习惯。"
)
else:
suggestion = (
"【低风险】申报数据基本合理,建议:\n"
"1. 核对收入记录;\n"
"2. 继续保持良好的申报习惯。"
)
else: # NONE
logger.debug(f"[DEBUG] 生成无风险建议")
suggestion = "继续维持良好的收入记录和申报习惯,定期进行自查。"
logger.debug(f"[DEBUG] 建议生成完成")
logger.debug(f"[DEBUG] <<< _generate_suggestion: 返回整改建议")
return suggestion
async def _build_evidence_chain(
self,
result: DetectionResult,
recharge_data: Dict[str, Any],
declaration_data: Dict[str, Any],
analysis: Dict[str, Any],
period: str
):
"""构建证据链"""
logger.debug(f"[DEBUG] >>> _build_evidence_chain: 开始构建证据链")
logger.debug(f"[DEBUG] 期间: {period}")
# 证据1充值汇总
logger.debug(f"[DEBUG] 添加证据1: 充值汇总")
result.add_evidence(RiskEvidence(
evidence_type="recharge_summary",
description="平台充值汇总",
data={
"period": period,
"total_amount": float(recharge_data["total"]),
"record_count": recharge_data["count"],
"currency": "CNY",
},
metadata={"source": "platform_recharge"}
))
logger.debug(f"[DEBUG] 充值总额: {recharge_data['total']}, 记录数: {recharge_data['count']}")
# 证据2申报汇总
logger.debug(f"[DEBUG] 添加证据2: 申报汇总")
result.add_evidence(RiskEvidence(
evidence_type="declaration_summary",
description="税务申报汇总",
data={
"period": period,
"total_revenue": float(declaration_data["total"]),
"declaration_count": declaration_data["count"],
},
metadata={"source": "tax_declaration"}
))
logger.debug(f"[DEBUG] 申报总额: {declaration_data['total']}, 记录数: {declaration_data['count']}")
# 证据3差异分析
if analysis["difference"] != 0:
logger.debug(f"[DEBUG] 添加证据3: 差异分析(差异非零)")
result.add_evidence(RiskEvidence(
evidence_type="revenue_gap_analysis",
description="收入差异分析",
data={
"difference_amount": float(analysis["difference"]),
"difference_rate": float(analysis["difference_rate"]),
"contract_ratio": float(analysis["contract_ratio"]) if analysis["contract_ratio"] else None,
"expected_revenue": float(analysis["expected_revenue"]) if analysis["expected_revenue"] else None,
},
metadata={"analysis_type": "revenue_gap"}
))
logger.debug(f"[DEBUG] 差异金额: {analysis['difference']}, 差异率: {analysis['difference_rate']}%")
else:
logger.debug(f"[DEBUG] 跳过证据3: 差异为0无需添加差异分析")
# 证据4TOP充值记录如果存在
if recharge_data["top_records"]:
logger.debug(f"[DEBUG] 添加证据4: TOP充值记录")
result.add_evidence(RiskEvidence(
evidence_type="top_recharge_records",
description=f"TOP {len(recharge_data['top_records'])} 充值记录",
data={
"records": recharge_data["top_records"],
"total_in_top": sum(r["amount"] for r in recharge_data["top_records"]),
},
metadata={"record_type": "recharge_detail"}
))
logger.debug(f"[DEBUG] TOP记录数: {len(recharge_data['top_records'])}")
else:
logger.debug(f"[DEBUG] 跳过证据4: 无TOP充值记录")
# 证据5申报记录如果存在
if declaration_data["records"]:
logger.debug(f"[DEBUG] 添加证据5: 申报记录")
result.add_evidence(RiskEvidence(
evidence_type="declaration_records",
description="税务申报记录",
data={
"records": declaration_data["records"],
},
metadata={"record_type": "declaration_detail"}
))
logger.debug(f"[DEBUG] 申报记录数: {len(declaration_data['records'])}")
else:
logger.debug(f"[DEBUG] 跳过证据5: 无申报记录")
logger.debug(f"[DEBUG] 证据链构建完成,总证据数: {len(result.evidence)}")
logger.debug(f"[DEBUG] <<< _build_evidence_chain: 返回")
def _parse_period(self, period: str) -> Tuple[datetime, datetime]:
"""
解析期间为开始和结束日期
支持格式:
- YYYY-MM月度
- YYYY-QN季度如 2024-Q1
- YYYY年度
"""
logger.debug(f"[DEBUG] >>> _parse_period: 开始解析期间period={period}")
try:
if "-Q" in period:
# 季度2024-Q1
logger.debug(f"[DEBUG] 检测到季度格式")
year_str, quarter_str = period.split("-Q")
year = int(year_str)
quarter = int(quarter_str)
logger.debug(f"[DEBUG] 解析结果: year={year}, quarter={quarter}")
if quarter < 1 or quarter > 4:
raise ValueError(f"季度必须在1-4之间实际值{quarter}")
start_month = (quarter - 1) * 3 + 1
start_date = datetime(year, start_month, 1)
if quarter == 4:
end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
else:
end_date = datetime(year, start_month + 3, 1) - timedelta(days=1)
logger.debug(f"[DEBUG] 季度期间解析完成: {start_date} ~ {end_date}")
logger.debug(f"[DEBUG] <<< _parse_period: 返回季度日期")
return start_date, end_date
elif "-" in period:
# 月度2024-01
logger.debug(f"[DEBUG] 检测到月度格式")
year, month = map(int, period.split("-"))
logger.debug(f"[DEBUG] 解析结果: year={year}, month={month}")
start_date = datetime(year, month, 1)
if month == 12:
end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
else:
end_date = datetime(year, month + 1, 1) - timedelta(days=1)
logger.debug(f"[DEBUG] 月度期间解析完成: {start_date} ~ {end_date}")
logger.debug(f"[DEBUG] <<< _parse_period: 返回月度日期")
return start_date, end_date
else:
# 年度2024
logger.debug(f"[DEBUG] 检测到年度格式")
year = int(period)
logger.debug(f"[DEBUG] 解析结果: year={year}")
start_date = datetime(year, 1, 1)
end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
logger.debug(f"[DEBUG] 年度期间解析完成: {start_date} ~ {end_date}")
logger.debug(f"[DEBUG] <<< _parse_period: 返回年度日期")
return start_date, end_date
except Exception as e:
logger.error(f"解析期间失败: {period}, 错误: {str(e)}")
raise ValueError(f"期间格式错误:{period}支持格式YYYY-MM、YYYY-QN、YYYY")
def _create_error_result(self, context: DetectionContext, error_message: str) -> DetectionResult:
"""创建错误结果"""
logger.debug(f"[DEBUG] >>> _create_error_result: 创建错误结果")
logger.debug(f"[DEBUG] 错误信息: {error_message}")
result = DetectionResult(
task_id=context.task_id,
rule_id=context.rule_id,
entity_id=context.get_parameter("streamer_id", ""),
entity_type="streamer",
risk_level=RiskLevel.UNKNOWN,
risk_score=0.0,
description=f"收入完整性检测失败: {error_message}",
suggestion="请检查参数设置或联系系统管理员",
)
logger.debug(f"[DEBUG] 错误结果创建完成")
logger.debug(f"[DEBUG] <<< _create_error_result: 返回错误结果")
return result