""" 收入完整性检测算法 检测平台充值金额与申报收入是否匹配,识别隐瞒收入风险 根据【01 风险审查规则.md】的定义: - 核查平台用户充值记录与税务申报数据的一致性 - 检测未提现收入是否完整申报 - 识别私户收款等隐匿收入行为 - 计算收入差异率并评级 风险等级判定标准: - CRITICAL(严重):差异率 > 50% 或 差异金额 > 10万元 - HIGH(高风险):差异率 > 30% 或 差异金额 > 5万元 - MEDIUM(中风险):差异率 > 10% 或 差异金额 > 1万元 - LOW(低风险):差异率 > 5% 或 差异金额 > 5000元 - NONE(无风险):差异在可接受范围内(<= 5% 且 <= 5000元) """ from typing import Dict, Any, List, Optional, Tuple from datetime import datetime, timedelta from decimal import Decimal from sqlalchemy import select, func, and_, or_ from sqlalchemy.ext.asyncio import AsyncSession from loguru import logger from .base import ( RiskDetectionAlgorithm, DetectionContext, DetectionResult, RiskEvidence, ) from app.models.risk_detection import RiskLevel from app.models.streamer import PlatformRecharge, StreamerInfo from app.models.tax_declaration import TaxDeclaration from app.models.contract import RevenueSharingContract class RevenueIntegrityAlgorithm(RiskDetectionAlgorithm): """收入完整性检测算法""" def __init__(self): super().__init__() # 默认配置 self.config = { "critical_rate_threshold": 50.0, # 严重风险差异率阈值 (%) "critical_amount_threshold": 100000.0, # 严重风险差异金额阈值 (元) "high_rate_threshold": 30.0, # 高风险差异率阈值 (%) "high_amount_threshold": 50000.0, # 高风险差异金额阈值 (元) "medium_rate_threshold": 10.0, # 中风险差异率阈值 (%) "medium_amount_threshold": 10000.0, # 中风险差异金额阈值 (元) "low_rate_threshold": 5.0, # 低风险差异率阈值 (%) "low_amount_threshold": 5000.0, # 低风险差异金额阈值 (元) "top_records_limit": 10, # 返回TOP充值记录数量 } def get_algorithm_code(self) -> str: return "REVENUE_INTEGRITY_CHECK" def get_algorithm_name(self) -> str: return "收入完整性检测" def get_description(self) -> str: return ( "通过对比平台充值记录与税务申报数据," "检测是否存在隐瞒收入、虚报或少报收入的风险" ) async def validate_data(self, context: DetectionContext): """验证数据完整性""" logger.debug("=" * 80) logger.debug("[DEBUG] 开始验证数据完整性") logger.debug("=" * 80) # 优先从streamer_id获取,其次从entity_id获取 streamer_id = context.get_parameter("streamer_id") logger.debug(f"[DEBUG] 从参数获取到的 streamer_id: {streamer_id}") if not streamer_id: entity_id = context.get_parameter("entity_id") entity_type = context.get_parameter("entity_type") logger.debug(f"[DEBUG] 未找到 streamer_id,尝试从 entity_id 获取: {entity_id}, entity_type: {entity_type}") if entity_id and entity_type == "streamer": streamer_id = entity_id logger.debug(f"[DEBUG] 使用 entity_id 作为 streamer_id: {streamer_id}") else: error_msg = "缺少必要参数:streamer_id或entity_id(实体类型需为streamer)" logger.error(f"[DEBUG] 数据验证失败: {error_msg}") raise ValueError(error_msg) period = context.get_parameter("period") logger.debug(f"[DEBUG] 获取到的 period: {period}") if not streamer_id: error_msg = "缺少必要参数:streamer_id" logger.error(f"[DEBUG] 数据验证失败: {error_msg}") raise ValueError(error_msg) if not period: error_msg = "缺少必要参数:period" logger.error(f"[DEBUG] 数据验证失败: {error_msg}") raise ValueError(error_msg) # 验证期间格式 logger.debug(f"[DEBUG] 开始验证期间格式: {period}") try: start_date, end_date = self._parse_period(period) logger.debug(f"[DEBUG] 期间格式验证成功") logger.debug(f"[DEBUG] 解析出的日期范围: {start_date} ~ {end_date}") except ValueError as e: error_msg = f"期间格式错误:{str(e)}" logger.error(f"[DEBUG] {error_msg}") raise ValueError(error_msg) logger.debug(f"[DEBUG] 数据验证完成,所有参数有效") logger.debug("=" * 80) async def _do_detect(self, context: DetectionContext) -> DetectionResult: """执行收入完整性检测""" logger.debug("=" * 80) logger.debug("[DEBUG] 开始执行收入完整性检测主流程") logger.debug("=" * 80) # 获取参数 # 优先从streamer_id获取,其次从entity_id获取 streamer_id = context.get_parameter("streamer_id") logger.debug(f"[DEBUG] 获取到 streamer_id: {streamer_id}") if not streamer_id: entity_id = context.get_parameter("entity_id") entity_type = context.get_parameter("entity_type") logger.debug(f"[DEBUG] 未找到 streamer_id,从 entity_id 获取: {entity_id}, entity_type: {entity_type}") if entity_id and entity_type == "streamer": streamer_id = entity_id logger.debug(f"[DEBUG] 使用 entity_id 作为 streamer_id: {streamer_id}") period = context.get_parameter("period") # 格式:YYYY-MM logger.debug(f"[DEBUG] 获取到 period: {period}") comparison_type = context.get_parameter("comparison_type", "monthly") # monthly/quarterly/yearly logger.debug(f"[DEBUG] 获取到 comparison_type: {comparison_type}") db_session = context.db_session if not db_session: logger.error("[DEBUG] 缺少数据库会话,无法继续执行") return self._create_error_result(context, "缺少数据库会话") logger.debug(f"[DEBUG] 数据库会话已建立") try: logger.info(f"开始执行收入完整性检测:主播ID={streamer_id}, 期间={period}") # 1. 获取主播基本信息 logger.debug("[DEBUG] 步骤 1: 获取主播基本信息") streamer_info = await self._get_streamer_info(db_session, streamer_id) logger.debug(f"[DEBUG] 主播信息获取结果: {streamer_info is not None}") if streamer_info: logger.debug(f"[DEBUG] 主播名称: {streamer_info.get('streamer_name')}, 实体类型: {streamer_info.get('entity_type')}") if not streamer_info: logger.error(f"[DEBUG] 找不到主播信息: {streamer_id}") return self._create_error_result(context, f"找不到主播信息:{streamer_id}") # 2. 解析期间 logger.debug("[DEBUG] 步骤 2: 解析期间") start_date, end_date = self._parse_period(period) logger.debug(f"[DEBUG] 解析出的日期范围: {start_date} ~ {end_date}") # 3. 获取平台充值数据 logger.debug("[DEBUG] 步骤 3: 获取平台充值数据") recharge_data = await self._get_recharge_data( db_session, streamer_id, start_date, end_date ) logger.debug(f"[DEBUG] 充值数据获取完成") logger.debug(f"[DEBUG] 充值总额: {recharge_data.get('total')}, 充值记录数: {recharge_data.get('count')}") # 4. 获取税务申报数据 logger.debug("[DEBUG] 步骤 4: 获取税务申报数据") declaration_data = await self._get_declaration_data( db_session, streamer_info, period, start_date, end_date ) logger.debug(f"[DEBUG] 申报数据获取完成") logger.debug(f"[DEBUG] 申报总额: {declaration_data.get('total')}, 申报记录数: {declaration_data.get('count')}") # 5. 获取分成协议 logger.debug("[DEBUG] 步骤 5: 获取分成协议") contract_ratio = await self._get_contract_ratio( db_session, streamer_id, start_date, end_date ) logger.debug(f"[DEBUG] 分成比例获取完成: {contract_ratio}%") # 6. 计算差异 logger.debug("[DEBUG] 步骤 6: 计算收入差异") analysis = self._analyze_revenue_gap( recharge_data, declaration_data, contract_ratio ) logger.debug(f"[DEBUG] 差异分析完成") logger.debug(f"[DEBUG] 差异金额: {analysis.get('difference')}, 差异率: {analysis.get('difference_rate')}%") # 7. 风险评估 logger.debug("[DEBUG] 步骤 7: 风险评估") risk_level, risk_score = self._calculate_risk_level( analysis["difference"], analysis["difference_rate"], analysis["recharge_total"] ) logger.debug(f"[DEBUG] 风险等级: {risk_level.value}, 风险评分: {risk_score}") # 8. 生成风险描述和建议 logger.debug("[DEBUG] 步骤 8: 生成风险描述和建议") description = self._generate_description( analysis, streamer_info, period ) suggestion = self._generate_suggestion( analysis, risk_level ) logger.debug(f"[DEBUG] 描述和建议生成完成") # 9. 创建检测结果 logger.debug("[DEBUG] 步骤 9: 创建检测结果") result = DetectionResult( task_id=context.task_id, rule_id=context.rule_id, entity_id=streamer_id, entity_type="streamer", risk_level=risk_level, risk_score=risk_score, description=description, suggestion=suggestion, risk_data={ "period": period, "streamer_name": streamer_info.get("streamer_name", ""), "entity_type": streamer_info.get("entity_type", ""), "recharge_total": float(analysis["recharge_total"]), "recharge_count": analysis["recharge_count"], "declared_revenue": float(analysis["declared_revenue"]), "declaration_count": analysis["declaration_count"], "difference": float(analysis["difference"]), "difference_rate": float(analysis["difference_rate"]), "contract_ratio": float(analysis["contract_ratio"]) if analysis["contract_ratio"] else None, "expected_revenue": float(analysis["expected_revenue"]) if analysis["expected_revenue"] else None, }, ) logger.debug(f"[DEBUG] 检测结果创建完成") # 10. 构建证据链 logger.debug("[DEBUG] 步骤 10: 构建证据链") await self._build_evidence_chain( result, recharge_data, declaration_data, analysis, period ) logger.debug(f"[DEBUG] 证据链构建完成,证据数量: {len(result.evidence)}") logger.info( f"收入完整性检测完成:主播ID={streamer_id}, " f"风险等级={risk_level.value}, 评分={risk_score}" ) logger.debug(f"[DEBUG] 检测流程全部完成") logger.debug("=" * 80) return result except Exception as e: logger.error(f"收入完整性检测执行失败: {str(e)}", exc_info=True) logger.debug(f"[DEBUG] 异常信息: {str(e)}") logger.debug("=" * 80) return self._create_error_result(context, f"检测执行失败: {str(e)}") async def _get_streamer_info( self, db_session: AsyncSession, streamer_id: str ) -> Optional[Dict[str, Any]]: """获取主播基本信息""" logger.debug(f"[DEBUG] >>> _get_streamer_info: 开始获取主播信息, streamer_id={streamer_id}") stmt = select(StreamerInfo).where(StreamerInfo.streamer_id == streamer_id) logger.debug(f"[DEBUG] 执行SQL查询: {stmt}") result = await db_session.execute(stmt) streamer = result.scalar_one_or_none() if not streamer: logger.warning(f"[DEBUG] 未找到主播信息: {streamer_id}") logger.debug(f"[DEBUG] <<< _get_streamer_info: 返回 None") return None streamer_info = { "streamer_id": streamer.streamer_id, "streamer_name": streamer.streamer_name, "entity_type": streamer.entity_type, "tax_registration_no": streamer.tax_registration_no, "unified_social_credit_code": streamer.unified_social_credit_code, "id_card_no": streamer.id_card_no, } logger.debug(f"[DEBUG] 主播信息查询成功") logger.debug(f"[DEBUG] 主播名称: {streamer.streamer_name}, 实体类型: {streamer.entity_type}") logger.debug(f"[DEBUG] 税号信息: 税务登记号={streamer.tax_registration_no}, 统一社会信用代码={streamer.unified_social_credit_code}, 身份证号={streamer.id_card_no}") logger.debug(f"[DEBUG] <<< _get_streamer_info: 返回主播信息") return streamer_info async def _get_recharge_data( self, db_session: AsyncSession, streamer_id: str, start_date: datetime, end_date: datetime ) -> Dict[str, Any]: """ 获取平台充值数据 返回: { "total": Decimal, # 总金额 "count": int, # 记录数 "top_records": List[Dict], # TOP充值记录 } """ logger.debug(f"[DEBUG] >>> _get_recharge_data: 开始获取充值数据") logger.debug(f"[DEBUG] 参数: streamer_id={streamer_id}, start_date={start_date}, end_date={end_date}") # 查询充值总额和记录数 logger.debug(f"[DEBUG] 查询充值总额和记录数") stmt_summary = select( func.count(PlatformRecharge.id).label("count"), func.coalesce(func.sum(PlatformRecharge.actual_amount_cny), 0).label("total") ).where( and_( PlatformRecharge.user_id == streamer_id, PlatformRecharge.recharge_time >= start_date, PlatformRecharge.recharge_time <= end_date, PlatformRecharge.status == "success", ) ) logger.debug(f"[DEBUG] 执行SQL: {stmt_summary}") result = await db_session.execute(stmt_summary) summary = result.one() logger.debug(f"[DEBUG] 查询结果: count={summary.count}, total={summary.total}") # 查询TOP充值记录 top_limit = self.config.get("top_records_limit", 10) logger.debug(f"[DEBUG] 查询TOP {top_limit} 充值记录") stmt_top = select(PlatformRecharge).where( and_( PlatformRecharge.user_id == streamer_id, PlatformRecharge.recharge_time >= start_date, PlatformRecharge.recharge_time <= end_date, PlatformRecharge.status == "success", ) ).order_by( PlatformRecharge.actual_amount_cny.desc() ).limit(top_limit) logger.debug(f"[DEBUG] 执行SQL: {stmt_top}") result_top = await db_session.execute(stmt_top) top_records = result_top.scalars().all() logger.debug(f"[DEBUG] TOP记录查询完成,记录数: {len(top_records)}") recharge_data = { "total": Decimal(str(summary.total)), "count": summary.count, "top_records": [ { "recharge_id": r.recharge_id, "user_name": r.user_name, "amount": float(r.actual_amount_cny), "time": r.recharge_time.strftime("%Y-%m-%d %H:%M:%S"), "payment_method": r.payment_method, } for r in top_records ] } logger.debug(f"[DEBUG] 充值数据汇总:") logger.debug(f"[DEBUG] 总金额: {recharge_data['total']}") logger.debug(f"[DEBUG] 记录数: {recharge_data['count']}") logger.debug(f"[DEBUG] TOP记录数: {len(recharge_data['top_records'])}") if recharge_data['top_records']: logger.debug(f"[DEBUG] 最高充值金额: {recharge_data['top_records'][0]['amount']}") logger.debug(f"[DEBUG] <<< _get_recharge_data: 返回充值数据") return recharge_data async def _get_declaration_data( self, db_session: AsyncSession, streamer_info: Dict[str, Any], period: str, start_date: datetime, end_date: datetime ) -> Dict[str, Any]: """ 获取税务申报数据 返回: { "total": Decimal, # 申报总收入 "count": int, # 申报记录数 "records": List[Dict], # 申报记录 } """ logger.debug(f"[DEBUG] >>> _get_declaration_data: 开始获取申报数据") logger.debug(f"[DEBUG] 主播ID: {streamer_info.get('streamer_id')}, 期间: {period}") logger.debug(f"[DEBUG] 日期范围: {start_date} ~ {end_date}") # 构建税号查询条件(个人可能用身份证,企业用统一社会信用代码) taxpayer_ids = [] if streamer_info.get("tax_registration_no"): taxpayer_ids.append(streamer_info["tax_registration_no"]) logger.debug(f"[DEBUG] 税务登记号: {streamer_info['tax_registration_no']}") if streamer_info.get("unified_social_credit_code"): taxpayer_ids.append(streamer_info["unified_social_credit_code"]) logger.debug(f"[DEBUG] 统一社会信用代码: {streamer_info['unified_social_credit_code']}") if streamer_info.get("id_card_no"): taxpayer_ids.append(streamer_info["id_card_no"]) logger.debug(f"[DEBUG] 身份证号: {streamer_info['id_card_no']}") if not taxpayer_ids: logger.warning(f"[DEBUG] 主播 {streamer_info['streamer_id']} 没有税号信息,无法查询申报数据") logger.debug(f"[DEBUG] <<< _get_declaration_data: 返回空数据") return {"total": Decimal("0"), "count": 0, "records": []} logger.debug(f"[DEBUG] 查询税号列表: {taxpayer_ids}") # 查询税务申报记录 logger.debug(f"[DEBUG] 查询税务申报记录") stmt = select(TaxDeclaration).where( and_( TaxDeclaration.taxpayer_id.in_(taxpayer_ids), or_( TaxDeclaration.tax_period == period, # 月度申报 and_( # 期间范围查询 TaxDeclaration.declaration_date >= start_date, TaxDeclaration.declaration_date <= end_date, ) ) ) ) logger.debug(f"[DEBUG] 执行SQL: {stmt}") result = await db_session.execute(stmt) declarations = result.scalars().all() logger.debug(f"[DEBUG] 查询到申报记录数: {len(declarations)}") total_revenue = Decimal("0") records = [] for decl in declarations: revenue = Decimal(str(decl.sales_revenue or 0)) total_revenue += revenue records.append({ "declaration_id": decl.vat_declaration_id, "taxpayer_name": decl.taxpayer_name, "tax_period": decl.tax_period, "sales_revenue": float(revenue), "declaration_date": decl.declaration_date.strftime("%Y-%m-%d") if decl.declaration_date else None, }) logger.debug(f"[DEBUG] 申报记录: {decl.vat_declaration_id}, 销售收入: {revenue}, 申报期间: {decl.tax_period}") declaration_data = { "total": total_revenue, "count": len(records), "records": records, } logger.debug(f"[DEBUG] 申报数据汇总:") logger.debug(f"[DEBUG] 总收入: {declaration_data['total']}") logger.debug(f"[DEBUG] 记录数: {declaration_data['count']}") logger.debug(f"[DEBUG] <<< _get_declaration_data: 返回申报数据") return declaration_data async def _get_contract_ratio( self, db_session: AsyncSession, streamer_id: str, start_date: datetime, end_date: datetime ) -> Optional[Decimal]: """ 获取主播分成比例 从分成协议表中获取主播的分成比例 """ logger.debug(f"[DEBUG] >>> _get_contract_ratio: 开始获取分成协议") logger.debug(f"[DEBUG] 参数: streamer_id={streamer_id}, start_date={start_date}, end_date={end_date}") stmt = select(RevenueSharingContract).where( and_( RevenueSharingContract.streamer_id == streamer_id, RevenueSharingContract.contract_start_date <= end_date, or_( RevenueSharingContract.contract_end_date.is_(None), RevenueSharingContract.contract_end_date >= start_date, ), RevenueSharingContract.contract_status == "active", ) ).order_by( RevenueSharingContract.contract_start_date.desc() ).limit(1) logger.debug(f"[DEBUG] 执行SQL: {stmt}") result = await db_session.execute(stmt) contract = result.scalar_one_or_none() if contract: logger.debug(f"[DEBUG] 找到分成协议") logger.debug(f"[DEBUG] 协议开始日期: {contract.contract_start_date}") logger.debug(f"[DEBUG] 协议结束日期: {contract.contract_end_date}") logger.debug(f"[DEBUG] 协议状态: {contract.contract_status}") logger.debug(f"[DEBUG] 主播分成比例: {contract.streamer_ratio}%") else: logger.debug(f"[DEBUG] 未找到有效的分成协议") if contract and contract.streamer_ratio: ratio = Decimal(str(contract.streamer_ratio)) * Decimal("100") logger.debug(f"[DEBUG] <<< _get_contract_ratio: 返回分成比例 {ratio}%") return ratio logger.debug(f"[DEBUG] <<< _get_contract_ratio: 返回 None(无分成比例)") return None def _analyze_revenue_gap( self, recharge_data: Dict[str, Any], declaration_data: Dict[str, Any], contract_ratio: Optional[Decimal] ) -> Dict[str, Any]: """ 分析收入差异 返回: { "recharge_total": Decimal, # 平台充值总额 "recharge_count": int, "declared_revenue": Decimal, # 申报收入 "declaration_count": int, "contract_ratio": Optional[Decimal], # 分成比例 "expected_revenue": Optional[Decimal], # 预期收入(基于分成比例) "difference": Decimal, # 差异金额 "difference_rate": Decimal, # 差异率 (%) } """ logger.debug(f"[DEBUG] >>> _analyze_revenue_gap: 开始分析收入差异") logger.debug(f"[DEBUG] 充值数据: 总金额={recharge_data['total']}, 记录数={recharge_data['count']}") logger.debug(f"[DEBUG] 申报数据: 总收入={declaration_data['total']}, 记录数={declaration_data['count']}") logger.debug(f"[DEBUG] 分成比例: {contract_ratio}%") recharge_total = recharge_data["total"] declared_revenue = declaration_data["total"] # 计算预期收入(如果有分成比例) expected_revenue = None if contract_ratio is not None: expected_revenue = recharge_total * contract_ratio / Decimal("100") logger.debug(f"[DEBUG] 计算预期收入: {recharge_total} * {contract_ratio}% = {expected_revenue}") else: logger.debug(f"[DEBUG] 无分成比例,不计算预期收入") # 计算差异(使用预期收入或充值总额) if expected_revenue is not None: difference = expected_revenue - declared_revenue base_amount = expected_revenue logger.debug(f"[DEBUG] 基于预期收入计算差异: {expected_revenue} - {declared_revenue} = {difference}") else: difference = recharge_total - declared_revenue base_amount = recharge_total logger.debug(f"[DEBUG] 基于充值总额计算差异: {recharge_total} - {declared_revenue} = {difference}") # 计算差异率 if base_amount > 0: difference_rate = (abs(difference) / base_amount) * Decimal("100") logger.debug(f"[DEBUG] 计算差异率: |{difference}| / {base_amount} * 100 = {difference_rate}%") else: difference_rate = Decimal("0") logger.debug(f"[DEBUG] 基准金额为0,差异率为0%") analysis = { "recharge_total": recharge_total, "recharge_count": recharge_data["count"], "declared_revenue": declared_revenue, "declaration_count": declaration_data["count"], "contract_ratio": contract_ratio, "expected_revenue": expected_revenue, "difference": difference, "difference_rate": difference_rate, } logger.debug(f"[DEBUG] 差异分析结果:") logger.debug(f"[DEBUG] 充值总额: {analysis['recharge_total']}") logger.debug(f"[DEBUG] 申报收入: {analysis['declared_revenue']}") logger.debug(f"[DEBUG] 预期收入: {analysis['expected_revenue']}") logger.debug(f"[DEBUG] 差异金额: {analysis['difference']}") logger.debug(f"[DEBUG] 差异率: {analysis['difference_rate']}%") logger.debug(f"[DEBUG] <<< _analyze_revenue_gap: 返回差异分析结果") return analysis def _calculate_risk_level( self, difference: Decimal, difference_rate: Decimal, recharge_total: Decimal ) -> Tuple[RiskLevel, float]: """ 计算风险等级和评分 风险判定逻辑: 1. 基于差异率和差异金额双重判断 2. 满足任一条件即判定为该风险等级 3. 评分基于风险等级和差异程度 返回:(RiskLevel, risk_score) """ logger.debug(f"[DEBUG] >>> _calculate_risk_level: 开始计算风险等级") logger.debug(f"[DEBUG] 参数: 差异金额={difference}, 差异率={difference_rate}%, 充值总额={recharge_total}") if recharge_total == 0: logger.debug(f"[DEBUG] 充值总额为0,返回无风险") logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.NONE, 0.0") return RiskLevel.NONE, 0.0 # 只关注收入少报风险(difference > 0) if difference <= 0: # 申报收入超过充值金额,风险较低 logger.debug(f"[DEBUG] 差异金额 <= 0(申报收入 >= 预期/充值金额),风险较低") logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.LOW, 20.0") return RiskLevel.LOW, 20.0 logger.debug(f"[DEBUG] 检测到收入少报风险(差异金额 > 0),开始风险等级判定") # 获取阈值配置 critical_rate = Decimal(str(self.config.get("critical_rate_threshold", 50.0))) critical_amount = Decimal(str(self.config.get("critical_amount_threshold", 100000.0))) high_rate = Decimal(str(self.config.get("high_rate_threshold", 30.0))) high_amount = Decimal(str(self.config.get("high_amount_threshold", 50000.0))) medium_rate = Decimal(str(self.config.get("medium_rate_threshold", 10.0))) medium_amount = Decimal(str(self.config.get("medium_amount_threshold", 10000.0))) low_rate = Decimal(str(self.config.get("low_rate_threshold", 5.0))) low_amount = Decimal(str(self.config.get("low_amount_threshold", 5000.0))) logger.debug(f"[DEBUG] 风险阈值配置:") logger.debug(f"[DEBUG] 严重: 差异率>{critical_rate}% 或 差异金额>{critical_amount}元") logger.debug(f"[DEBUG] 高: 差异率>{high_rate}% 或 差异金额>{high_amount}元") logger.debug(f"[DEBUG] 中: 差异率>{medium_rate}% 或 差异金额>{medium_amount}元") logger.debug(f"[DEBUG] 低: 差异率>{low_rate}% 或 差异金额>{low_amount}元") # 判断风险等级 if difference_rate > critical_rate or difference > critical_amount: # 严重风险:评分 85-100 score = 85.0 + min(15.0, float(difference_rate - critical_rate) / 10.0) final_score = min(100.0, score) logger.debug(f"[DEBUG] 判定为严重风险") logger.debug(f"[DEBUG] 差异率 {difference_rate}% > {critical_rate}% 或 差异金额 {difference} > {critical_amount}") logger.debug(f"[DEBUG] 评分: {final_score}") logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.CRITICAL, {final_score}") return RiskLevel.CRITICAL, final_score elif difference_rate > high_rate or difference > high_amount: # 高风险:评分 70-85 score = 70.0 + min(15.0, float(difference_rate - high_rate) / 5.0) final_score = min(85.0, score) logger.debug(f"[DEBUG] 判定为高风险") logger.debug(f"[DEBUG] 差异率 {difference_rate}% > {high_rate}% 或 差异金额 {difference} > {high_amount}") logger.debug(f"[DEBUG] 评分: {final_score}") logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.HIGH, {final_score}") return RiskLevel.HIGH, final_score elif difference_rate > medium_rate or difference > medium_amount: # 中风险:评分 50-70 score = 50.0 + min(20.0, float(difference_rate - medium_rate) / 2.0) final_score = min(70.0, score) logger.debug(f"[DEBUG] 判定为中风险") logger.debug(f"[DEBUG] 差异率 {difference_rate}% > {medium_rate}% 或 差异金额 {difference} > {medium_amount}") logger.debug(f"[DEBUG] 评分: {final_score}") logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.MEDIUM, {final_score}") return RiskLevel.MEDIUM, final_score elif difference_rate > low_rate or difference > low_amount: # 低风险:评分 20-50 score = 20.0 + min(30.0, float(difference_rate - low_rate) * 6.0) final_score = min(50.0, score) logger.debug(f"[DEBUG] 判定为低风险") logger.debug(f"[DEBUG] 差异率 {difference_rate}% > {low_rate}% 或 差异金额 {difference} > {low_amount}") logger.debug(f"[DEBUG] 评分: {final_score}") logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.LOW, {final_score}") return RiskLevel.LOW, final_score else: # 无风险:评分 0-20 score = min(20.0, float(difference_rate) * 4.0) logger.debug(f"[DEBUG] 判定为无风险") logger.debug(f"[DEBUG] 差异率 {difference_rate}% <= {low_rate}% 且 差异金额 {difference} <= {low_amount}") logger.debug(f"[DEBUG] 评分: {score}") logger.debug(f"[DEBUG] <<< _calculate_risk_level: 返回 RiskLevel.NONE, {score}") return RiskLevel.NONE, score def _generate_description( self, analysis: Dict[str, Any], streamer_info: Dict[str, Any], period: str ) -> str: """生成风险描述""" logger.debug(f"[DEBUG] >>> _generate_description: 开始生成风险描述") recharge_total = analysis["recharge_total"] declared_revenue = analysis["declared_revenue"] difference = analysis["difference"] difference_rate = analysis["difference_rate"] expected_revenue = analysis.get("expected_revenue") contract_ratio = analysis.get("contract_ratio") streamer_name = streamer_info.get("streamer_name", "") entity_type_map = { "individual": "个人", "individual_business": "个体工商户", "enterprise": "企业", } entity_type = entity_type_map.get( streamer_info.get("entity_type", ""), streamer_info.get("entity_type", "") ) logger.debug(f"[DEBUG] 描述生成参数:") logger.debug(f"[DEBUG] 主播名称: {streamer_name}") logger.debug(f"[DEBUG] 实体类型: {entity_type}") logger.debug(f"[DEBUG] 期间: {period}") logger.debug(f"[DEBUG] 充值总额: {recharge_total}") logger.debug(f"[DEBUG] 申报收入: {declared_revenue}") logger.debug(f"[DEBUG] 预期收入: {expected_revenue}") logger.debug(f"[DEBUG] 分成比例: {contract_ratio}%") logger.debug(f"[DEBUG] 差异金额: {difference}") logger.debug(f"[DEBUG] 差异率: {difference_rate}%") if difference > 0: # 收入少报 logger.debug(f"[DEBUG] 生成收入少报描述") if expected_revenue is not None: desc = ( f"检测到收入完整性风险:主播【{streamer_name}】({entity_type})" f"在 {period} 期间,平台充值总额为 {recharge_total:,.2f}元," f"按分成比例 {contract_ratio}% 计算,预期申报收入为 {expected_revenue:,.2f}元," f"但实际申报收入仅为 {declared_revenue:,.2f}元," f"存在 {difference:,.2f}元差额(差异率 {difference_rate:.2f}%)," f"可能存在隐瞒收入或少报收入的情况。" ) else: desc = ( f"检测到收入完整性风险:主播【{streamer_name}】({entity_type})" f"在 {period} 期间,平台充值总额为 {recharge_total:,.2f}元," f"但申报收入仅为 {declared_revenue:,.2f}元," f"存在 {difference:,.2f}元差额(差异率 {difference_rate:.2f}%)," f"可能存在隐瞒收入或少报收入的情况。" ) elif difference < 0: # 申报收入超过充值金额 logger.debug(f"[DEBUG] 生成申报收入超限描述") desc = ( f"主播【{streamer_name}】({entity_type})在 {period} 期间," f"申报收入 {declared_revenue:,.2f}元 超过平台充值总额 {recharge_total:,.2f}元," f"超出 {abs(difference):,.2f}元,可能存在虚报收入或数据录入错误。" ) else: # 数据一致 logger.debug(f"[DEBUG] 生成数据一致描述") desc = ( f"主播【{streamer_name}】({entity_type})在 {period} 期间," f"收入完整性检查通过,平台充值与申报收入基本一致。" ) logger.debug(f"[DEBUG] 描述生成完成") logger.debug(f"[DEBUG] <<< _generate_description: 返回风险描述") return desc def _generate_suggestion( self, analysis: Dict[str, Any], risk_level: RiskLevel ) -> str: """生成整改建议""" logger.debug(f"[DEBUG] >>> _generate_suggestion: 开始生成整改建议") logger.debug(f"[DEBUG] 风险等级: {risk_level.value}") difference = analysis["difference"] logger.debug(f"[DEBUG] 差异金额: {difference}") if risk_level == RiskLevel.CRITICAL: logger.debug(f"[DEBUG] 生成严重风险建议") if difference > 0: suggestion = ( "【严重风险】发现重大收入差异,建议立即采取以下措施:\n" "1. 立即核实平台充值记录与实际收入的对应关系;\n" "2. 检查是否存在未申报的收入,特别是私户收款;\n" "3. 核查分成协议的执行情况;\n" "4. 补充申报遗漏的收入,并计算应补缴税款;\n" "5. 提供相关证明材料说明差额原因;\n" "6. 建议聘请专业税务顾问进行全面审查。" ) else: suggestion = ( "【严重风险】申报收入大幅超过充值金额,建议:\n" "1. 核实申报数据的准确性;\n" "2. 检查是否存在重复申报;\n" "3. 检查是否有其他收入来源;\n" "4. 提供收入来源的详细说明和证明材料。" ) elif risk_level == RiskLevel.HIGH: logger.debug(f"[DEBUG] 生成高风险建议") if difference > 0: suggestion = ( "【高风险】发现较大收入差异,建议:\n" "1. 核实平台充值记录与实际收入的对应关系;\n" "2. 检查是否存在未申报的收入;\n" "3. 补充申报遗漏的收入;\n" "4. 提供相关证明材料说明差额原因;\n" "5. 加强收入管理和申报流程规范。" ) else: suggestion = ( "【高风险】申报收入超过充值金额较多,建议:\n" "1. 核实申报数据的准确性;\n" "2. 检查是否存在数据录入错误;\n" "3. 提供收入来源的详细说明。" ) elif risk_level == RiskLevel.MEDIUM: logger.debug(f"[DEBUG] 生成中风险建议") if difference > 0: suggestion = ( "【中风险】发现一定收入差异,建议:\n" "1. 核对平台充值与申报数据的一致性;\n" "2. 检查未提现收入的申报情况;\n" "3. 补充说明差额原因;\n" "4. 完善收入记录和申报流程。" ) else: suggestion = ( "【中风险】申报收入略超充值金额,建议:\n" "1. 核对申报数据;\n" "2. 说明其他收入来源。" ) elif risk_level == RiskLevel.LOW: logger.debug(f"[DEBUG] 生成低风险建议") if difference > 0: suggestion = ( "【低风险】发现少量收入差异,建议:\n" "1. 核对收入记录的完整性;\n" "2. 说明差额原因;\n" "3. 继续保持良好的申报习惯。" ) else: suggestion = ( "【低风险】申报数据基本合理,建议:\n" "1. 核对收入记录;\n" "2. 继续保持良好的申报习惯。" ) else: # NONE logger.debug(f"[DEBUG] 生成无风险建议") suggestion = "继续维持良好的收入记录和申报习惯,定期进行自查。" logger.debug(f"[DEBUG] 建议生成完成") logger.debug(f"[DEBUG] <<< _generate_suggestion: 返回整改建议") return suggestion async def _build_evidence_chain( self, result: DetectionResult, recharge_data: Dict[str, Any], declaration_data: Dict[str, Any], analysis: Dict[str, Any], period: str ): """构建证据链""" logger.debug(f"[DEBUG] >>> _build_evidence_chain: 开始构建证据链") logger.debug(f"[DEBUG] 期间: {period}") # 证据1:充值汇总 logger.debug(f"[DEBUG] 添加证据1: 充值汇总") result.add_evidence(RiskEvidence( evidence_type="recharge_summary", description="平台充值汇总", data={ "period": period, "total_amount": float(recharge_data["total"]), "record_count": recharge_data["count"], "currency": "CNY", }, metadata={"source": "platform_recharge"} )) logger.debug(f"[DEBUG] 充值总额: {recharge_data['total']}, 记录数: {recharge_data['count']}") # 证据2:申报汇总 logger.debug(f"[DEBUG] 添加证据2: 申报汇总") result.add_evidence(RiskEvidence( evidence_type="declaration_summary", description="税务申报汇总", data={ "period": period, "total_revenue": float(declaration_data["total"]), "declaration_count": declaration_data["count"], }, metadata={"source": "tax_declaration"} )) logger.debug(f"[DEBUG] 申报总额: {declaration_data['total']}, 记录数: {declaration_data['count']}") # 证据3:差异分析 if analysis["difference"] != 0: logger.debug(f"[DEBUG] 添加证据3: 差异分析(差异非零)") result.add_evidence(RiskEvidence( evidence_type="revenue_gap_analysis", description="收入差异分析", data={ "difference_amount": float(analysis["difference"]), "difference_rate": float(analysis["difference_rate"]), "contract_ratio": float(analysis["contract_ratio"]) if analysis["contract_ratio"] else None, "expected_revenue": float(analysis["expected_revenue"]) if analysis["expected_revenue"] else None, }, metadata={"analysis_type": "revenue_gap"} )) logger.debug(f"[DEBUG] 差异金额: {analysis['difference']}, 差异率: {analysis['difference_rate']}%") else: logger.debug(f"[DEBUG] 跳过证据3: 差异为0,无需添加差异分析") # 证据4:TOP充值记录(如果存在) if recharge_data["top_records"]: logger.debug(f"[DEBUG] 添加证据4: TOP充值记录") result.add_evidence(RiskEvidence( evidence_type="top_recharge_records", description=f"TOP {len(recharge_data['top_records'])} 充值记录", data={ "records": recharge_data["top_records"], "total_in_top": sum(r["amount"] for r in recharge_data["top_records"]), }, metadata={"record_type": "recharge_detail"} )) logger.debug(f"[DEBUG] TOP记录数: {len(recharge_data['top_records'])}") else: logger.debug(f"[DEBUG] 跳过证据4: 无TOP充值记录") # 证据5:申报记录(如果存在) if declaration_data["records"]: logger.debug(f"[DEBUG] 添加证据5: 申报记录") result.add_evidence(RiskEvidence( evidence_type="declaration_records", description="税务申报记录", data={ "records": declaration_data["records"], }, metadata={"record_type": "declaration_detail"} )) logger.debug(f"[DEBUG] 申报记录数: {len(declaration_data['records'])}") else: logger.debug(f"[DEBUG] 跳过证据5: 无申报记录") logger.debug(f"[DEBUG] 证据链构建完成,总证据数: {len(result.evidence)}") logger.debug(f"[DEBUG] <<< _build_evidence_chain: 返回") def _parse_period(self, period: str) -> Tuple[datetime, datetime]: """ 解析期间为开始和结束日期 支持格式: - YYYY-MM:月度 - YYYY-QN:季度(如 2024-Q1) - YYYY:年度 """ logger.debug(f"[DEBUG] >>> _parse_period: 开始解析期间,period={period}") try: if "-Q" in period: # 季度:2024-Q1 logger.debug(f"[DEBUG] 检测到季度格式") year_str, quarter_str = period.split("-Q") year = int(year_str) quarter = int(quarter_str) logger.debug(f"[DEBUG] 解析结果: year={year}, quarter={quarter}") if quarter < 1 or quarter > 4: raise ValueError(f"季度必须在1-4之间,实际值:{quarter}") start_month = (quarter - 1) * 3 + 1 start_date = datetime(year, start_month, 1) if quarter == 4: end_date = datetime(year + 1, 1, 1) - timedelta(days=1) else: end_date = datetime(year, start_month + 3, 1) - timedelta(days=1) logger.debug(f"[DEBUG] 季度期间解析完成: {start_date} ~ {end_date}") logger.debug(f"[DEBUG] <<< _parse_period: 返回季度日期") return start_date, end_date elif "-" in period: # 月度:2024-01 logger.debug(f"[DEBUG] 检测到月度格式") year, month = map(int, period.split("-")) logger.debug(f"[DEBUG] 解析结果: year={year}, month={month}") start_date = datetime(year, month, 1) if month == 12: end_date = datetime(year + 1, 1, 1) - timedelta(days=1) else: end_date = datetime(year, month + 1, 1) - timedelta(days=1) logger.debug(f"[DEBUG] 月度期间解析完成: {start_date} ~ {end_date}") logger.debug(f"[DEBUG] <<< _parse_period: 返回月度日期") return start_date, end_date else: # 年度:2024 logger.debug(f"[DEBUG] 检测到年度格式") year = int(period) logger.debug(f"[DEBUG] 解析结果: year={year}") start_date = datetime(year, 1, 1) end_date = datetime(year + 1, 1, 1) - timedelta(days=1) logger.debug(f"[DEBUG] 年度期间解析完成: {start_date} ~ {end_date}") logger.debug(f"[DEBUG] <<< _parse_period: 返回年度日期") return start_date, end_date except Exception as e: logger.error(f"解析期间失败: {period}, 错误: {str(e)}") raise ValueError(f"期间格式错误:{period},支持格式:YYYY-MM、YYYY-QN、YYYY") def _create_error_result(self, context: DetectionContext, error_message: str) -> DetectionResult: """创建错误结果""" logger.debug(f"[DEBUG] >>> _create_error_result: 创建错误结果") logger.debug(f"[DEBUG] 错误信息: {error_message}") result = DetectionResult( task_id=context.task_id, rule_id=context.rule_id, entity_id=context.get_parameter("streamer_id", ""), entity_type="streamer", risk_level=RiskLevel.UNKNOWN, risk_score=0.0, description=f"收入完整性检测失败: {error_message}", suggestion="请检查参数设置或联系系统管理员", ) logger.debug(f"[DEBUG] 错误结果创建完成") logger.debug(f"[DEBUG] <<< _create_error_result: 返回错误结果") return result