""" 结果处理器 负责聚合、评分、分析检测结果,生成综合报告 功能: 1. 聚合多个规则的检测结果 2. 计算综合风险评分 3. 构建完整证据链 4. 生成整改建议 5. 输出结构化报告 """ from typing import Dict, Any, List, Optional, Tuple from decimal import Decimal from datetime import datetime from dataclasses import dataclass, field from collections import defaultdict, Counter from loguru import logger from ..algorithms.base import DetectionResult, RiskEvidence from app.models.risk_detection import RiskLevel @dataclass class AggregatedResult: """聚合结果""" entity_id: str entity_type: str period: str task_id: str # 统计信息 total_rules: int = 0 executed_rules: int = 0 failed_rules: int = 0 # 风险分布 risk_distribution: Dict[str, int] = field(default_factory=lambda: { RiskLevel.CRITICAL.value: 0, RiskLevel.HIGH.value: 0, RiskLevel.MEDIUM.value: 0, RiskLevel.LOW.value: 0, RiskLevel.NONE.value: 0, RiskLevel.UNKNOWN.value: 0, }) # 综合评分 overall_risk_score: float = 0.0 overall_risk_level: RiskLevel = RiskLevel.NONE # 详细结果 results: List[DetectionResult] = field(default_factory=list) evidence_chain: List[RiskEvidence] = field(default_factory=list) # 时间信息 detection_start_time: Optional[datetime] = None detection_end_time: Optional[datetime] = None total_duration: float = 0.0 def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { "entity_id": self.entity_id, "entity_type": self.entity_type, "period": self.period, "task_id": self.task_id, "total_rules": self.total_rules, "executed_rules": self.executed_rules, "failed_rules": self.failed_rules, "risk_distribution": self.risk_distribution, "overall_risk_score": self.overall_risk_score, "overall_risk_level": self.overall_risk_level.value, "results": [r.to_dict() for r in self.results], "evidence_chain": [e.to_dict() for e in self.evidence_chain], "detection_start_time": self.detection_start_time.isoformat() if self.detection_start_time else None, "detection_end_time": self.detection_end_time.isoformat() if self.detection_end_time else None, "total_duration": self.total_duration, } class ResultAggregator: """结果聚合器""" def __init__(self): self.risk_level_order = { RiskLevel.CRITICAL: 5, RiskLevel.HIGH: 4, RiskLevel.MEDIUM: 3, RiskLevel.LOW: 2, RiskLevel.NONE: 1, RiskLevel.UNKNOWN: 0, } def aggregate( self, results: List[DetectionResult], task_id: str, entity_id: str, entity_type: str, period: str ) -> AggregatedResult: """ 聚合检测结果 Args: results: 检测结果列表 task_id: 任务ID entity_id: 实体ID entity_type: 实体类型 period: 检测期间 Returns: AggregatedResult """ logger.info(f"开始聚合 {len(results)} 个检测结果") # 创建聚合结果对象 agg_result = AggregatedResult( entity_id=entity_id, entity_type=entity_type, period=period, task_id=task_id, total_rules=len(results) ) # 过滤有效结果 valid_results = [r for r in results if r.risk_level != RiskLevel.UNKNOWN] failed_results = [r for r in results if r.risk_level == RiskLevel.UNKNOWN] agg_result.executed_rules = len(valid_results) agg_result.failed_rules = len(failed_results) agg_result.results = results # 统计风险分布 self._calculate_risk_distribution(agg_result, results) # 计算综合风险评分 self._calculate_overall_score(agg_result, valid_results) # 收集时间信息 self._collect_timing_info(agg_result, results) logger.info( f"结果聚合完成:实体={entity_id}, 总体风险={agg_result.overall_risk_level.value}, " f"评分={agg_result.overall_risk_score:.2f}" ) return agg_result def _calculate_risk_distribution( self, agg_result: AggregatedResult, results: List[DetectionResult] ): """计算风险分布""" distribution = Counter() for result in results: level = result.risk_level.value distribution[level] += 1 agg_result.risk_distribution = dict(distribution) def _calculate_overall_score( self, agg_result: AggregatedResult, results: List[DetectionResult] ): """计算综合风险评分""" if not results: agg_result.overall_risk_level = RiskLevel.NONE agg_result.overall_risk_score = 0.0 return # 策略1:加权平均(可根据需要调整) weights = self._get_default_weights() total_weighted_score = 0.0 total_weight = 0.0 for result in results: # 安全访问 rule_id,如果不存在则使用算法代码 result_rule_id = getattr(result, 'rule_id', None) if not result_rule_id: result_rule_id = getattr(result, 'algorithm_code', 'unknown') weight = weights.get(result_rule_id, 1.0) total_weighted_score += result.risk_score * weight total_weight += weight if total_weight > 0: avg_score = total_weighted_score / total_weight else: avg_score = 0.0 # 策略2:取最高风险等级 max_risk_level = max(results, key=lambda r: self.risk_level_order.get(r.risk_level, 0)).risk_level # 综合策略:加权评分 + 风险等级调整 final_score = self._adjust_score_by_risk_level(avg_score, max_risk_level) agg_result.overall_risk_score = min(100.0, max(0.0, final_score)) agg_result.overall_risk_level = max_risk_level def _get_default_weights(self) -> Dict[str, float]: """获取默认权重配置""" return { # 收入完整性检测权重最高 "REVENUE_INTEGRITY_CHECK": 2.0, # 私户收款检测 "PRIVATE_ACCOUNT_DETECTION": 1.8, # 发票虚开检测 "INVOICE_FRAUD_DETECTION": 1.5, # 费用异常检测 "EXPENSE_ANOMALY_DETECTION": 1.2, # 税率错误检测 "TAX_RATE_CHECK": 1.0, # 综合评估 "TAX_RISK_ASSESSMENT": 0.8, } def _adjust_score_by_risk_level(self, score: float, risk_level: RiskLevel) -> float: """根据风险等级调整评分""" level_multipliers = { RiskLevel.CRITICAL: 1.0, # 保持不变 RiskLevel.HIGH: 0.9, # 轻微降低 RiskLevel.MEDIUM: 0.8, # 降低20% RiskLevel.LOW: 0.6, # 降低40% RiskLevel.NONE: 0.3, # 大幅降低 RiskLevel.UNKNOWN: 0.5, # 未知状态中等调整 } multiplier = level_multipliers.get(risk_level, 1.0) return score * multiplier def _collect_timing_info( self, agg_result: AggregatedResult, results: List[DetectionResult] ): """收集时间信息""" detection_times = [] for result in results: if hasattr(result, 'detected_at') and result.detected_at: detection_times.append(result.detected_at) if detection_times: agg_result.detection_start_time = min(detection_times) agg_result.detection_end_time = max(detection_times) agg_result.total_duration = ( agg_result.detection_end_time - agg_result.detection_start_time ).total_seconds() class RiskScoreCalculator: """风险评分计算器""" def __init__(self): # 风险等级阈值配置 self.thresholds = { "CRITICAL": {"score_min": 85, "amount_min": 100000}, "HIGH": {"score_min": 70, "amount_min": 50000}, "MEDIUM": {"score_min": 50, "amount_min": 10000}, "LOW": {"score_min": 20, "amount_min": 5000}, } def calculate_comprehensive_score( self, aggregated_result: AggregatedResult, rule_weights: Optional[Dict[str, float]] = None ) -> Tuple[float, RiskLevel]: """ 计算综合风险评分 Args: aggregated_result: 聚合结果 rule_weights: 规则权重配置 Returns: (综合评分, 综合风险等级) """ if not aggregated_result.results: return 0.0, RiskLevel.NONE # 1. 按风险等级分组结果 level_groups = self._group_by_risk_level(aggregated_result.results) # 2. 计算加权平均分 weighted_score = self._calculate_weighted_average( aggregated_result.results, rule_weights or {} ) # 3. 考虑高风险规则的影响 adjusted_score = self._apply_high_risk_boost(weighted_score, level_groups) # 4. 考虑风险集中度 final_score = self._apply_concentration_penalty(adjusted_score, level_groups) # 5. 确定综合风险等级 risk_level = self._determine_overall_risk_level( final_score, level_groups, aggregated_result.risk_distribution ) return min(100.0, max(0.0, final_score)), risk_level def _group_by_risk_level( self, results: List[DetectionResult] ) -> Dict[RiskLevel, List[DetectionResult]]: """按风险等级分组结果""" groups = defaultdict(list) for result in results: groups[result.risk_level].append(result) return dict(groups) def _calculate_weighted_average( self, results: List[DetectionResult], rule_weights: Dict[str, float] ) -> float: """计算加权平均分""" if not results: return 0.0 total_weighted = 0.0 total_weight = 0.0 for result in results: # 使用算法代码作为权重键 weight = rule_weights.get(result.rule_id, 1.0) total_weighted += result.risk_score * weight total_weight += weight return total_weighted / total_weight if total_weight > 0 else 0.0 def _apply_high_risk_boost( self, base_score: float, level_groups: Dict[RiskLevel, List[DetectionResult]] ) -> float: """应用高风险规则加分""" boost = 0.0 # 严重风险规则加权加成 critical_count = len(level_groups.get(RiskLevel.CRITICAL, [])) high_count = len(level_groups.get(RiskLevel.HIGH, [])) if critical_count > 0: boost += min(15.0, critical_count * 5.0) if high_count > 0: boost += min(10.0, high_count * 2.0) return base_score + boost def _apply_concentration_penalty( self, score: float, level_groups: Dict[RiskLevel, List[DetectionResult]] ) -> float: """应用风险集中度惩罚""" total_rules = sum(len(group) for group in level_groups.values()) if total_rules == 0: return score # 计算高风险规则占比 high_risk_count = ( len(level_groups.get(RiskLevel.CRITICAL, [])) + len(level_groups.get(RiskLevel.HIGH, [])) ) concentration_ratio = high_risk_count / total_rules # 如果高风险规则占比超过50%,给予额外加分 if concentration_ratio > 0.5: penalty = min(10.0, (concentration_ratio - 0.5) * 20.0) return score + penalty return score def _determine_overall_risk_level( self, score: float, level_groups: Dict[RiskLevel, List[DetectionResult]], distribution: Dict[str, int] ) -> RiskLevel: """确定综合风险等级""" # 策略1:基于最高风险等级 max_level = max(level_groups.keys(), key=lambda x: { RiskLevel.CRITICAL: 5, RiskLevel.HIGH: 4, RiskLevel.MEDIUM: 3, RiskLevel.LOW: 2, RiskLevel.NONE: 1, RiskLevel.UNKNOWN: 0, }.get(x, 0)) # 策略2:基于评分阈值 if score >= 85: return max(RiskLevel.CRITICAL, max_level) elif score >= 70: return max(RiskLevel.HIGH, max_level) elif score >= 50: return max(RiskLevel.MEDIUM, max_level) elif score >= 20: return max(RiskLevel.LOW, max_level) else: return RiskLevel.NONE class EvidenceBuilder: """证据构建器""" def __init__(self): self.evidence_types = { "data_summary": "数据汇总", "risk_analysis": "风险分析", "evidence_detail": "证据详情", "comparison_result": "对比结果", "statistical_info": "统计信息", } def build_comprehensive_evidence_chain( self, aggregated_result: AggregatedResult ) -> List[RiskEvidence]: """ 构建完整的证据链 Args: aggregated_result: 聚合结果 Returns: 证据链列表 """ logger.info(f"开始构建证据链,包含 {len(aggregated_result.results)} 个检测结果") evidence_chain = [] # 1. 添加汇总证据 summary_evidence = self._create_summary_evidence(aggregated_result) evidence_chain.append(summary_evidence) # 2. 添加风险分布证据 distribution_evidence = self._create_distribution_evidence(aggregated_result) evidence_chain.append(distribution_evidence) # 3. 添加各规则检测证据 for result in aggregated_result.results: rule_evidence = self._create_rule_evidence(result) evidence_chain.append(rule_evidence) # 4. 添加关键风险证据 key_risks = self._identify_key_risks(aggregated_result.results) if key_risks: key_risk_evidence = self._create_key_risks_evidence(key_risks) evidence_chain.append(key_risk_evidence) logger.info(f"证据链构建完成,共 {len(evidence_chain)} 个证据项") return evidence_chain def _create_summary_evidence(self, agg_result: AggregatedResult) -> RiskEvidence: """创建汇总证据""" return RiskEvidence( evidence_type="detection_summary", description="检测结果汇总", data={ "entity_id": agg_result.entity_id, "entity_type": agg_result.entity_type, "period": agg_result.period, "total_rules": agg_result.total_rules, "executed_rules": agg_result.executed_rules, "failed_rules": agg_result.failed_rules, "overall_risk_score": agg_result.overall_risk_score, "overall_risk_level": agg_result.overall_risk_level.value, "detection_duration": agg_result.total_duration, }, metadata={"evidence_type": "summary"} ) def _create_distribution_evidence(self, agg_result: AggregatedResult) -> RiskEvidence: """创建风险分布证据""" return RiskEvidence( evidence_type="risk_distribution", description="风险等级分布", data=agg_result.risk_distribution, metadata={"evidence_type": "distribution"} ) def _create_rule_evidence(self, result: DetectionResult) -> RiskEvidence: """创建单个规则检测证据""" return RiskEvidence( evidence_type=f"rule_{result.rule_id}", description=f"规则检测结果:{result.rule_id}", data={ "rule_id": result.rule_id, "risk_level": result.risk_level.value, "risk_score": result.risk_score, "description": result.description, "suggestion": result.suggestion, "detected_at": result.detected_at.isoformat() if result.detected_at else None, }, metadata={"evidence_type": "rule_result", "rule_id": result.rule_id} ) def _identify_key_risks( self, results: List[DetectionResult] ) -> List[DetectionResult]: """识别关键风险""" # 筛选高风险及以上的结果 high_risk_results = [ r for r in results if r.risk_level in [RiskLevel.CRITICAL, RiskLevel.HIGH] ] # 按评分排序,取前5个 high_risk_results.sort(key=lambda x: x.risk_score, reverse=True) return high_risk_results[:5] def _create_key_risks_evidence( self, key_risks: List[DetectionResult] ) -> RiskEvidence: """创建关键风险证据""" return RiskEvidence( evidence_type="key_risks", description="关键风险列表", data={ "key_risk_count": len(key_risks), "risks": [ { "rule_id": r.rule_id, "risk_level": r.risk_level.value, "risk_score": r.risk_score, "description": r.description[:200] + "..." if len(r.description) > 200 else r.description, } for r in key_risks ] }, metadata={"evidence_type": "key_risks", "count": len(key_risks)} ) class SuggestionGenerator: """整改建议生成器""" def __init__(self): # 风险等级对应的建议模板 self.suggestion_templates = { RiskLevel.CRITICAL: [ "【严重风险】发现重大风险,建议立即采取紧急措施", "立即进行全面风险自查和整改", "建议聘请专业税务顾问进行深度审查", "建立每日监控机制,及时发现和处置风险", ], RiskLevel.HIGH: [ "【高风险】发现较大风险,建议尽快采取整改措施", "尽快核实相关风险点并补充申报", "加强内部税务合规管理", "建立定期风险自查机制", ], RiskLevel.MEDIUM: [ "【中风险】发现一定风险,建议关注并及时处理", "核对相关数据,确保申报准确性", "完善税务申报流程", "定期进行内部审查", ], RiskLevel.LOW: [ "【低风险】风险较小,建议继续保持良好习惯", "继续保持规范的税务申报流程", "定期进行数据核对", ], RiskLevel.NONE: [ "未发现明显风险,建议继续保持良好的合规状态", "定期进行税务自查,确保合规经营", ], } def generate_comprehensive_suggestions( self, aggregated_result: AggregatedResult ) -> List[str]: """ 生成综合整改建议 Args: aggregated_result: 聚合结果 Returns: 建议列表 """ logger.info(f"开始生成整改建议,总体风险等级:{aggregated_result.overall_risk_level.value}") suggestions = [] # 1. 基于综合风险等级生成基础建议 base_suggestions = self._generate_base_suggestions(aggregated_result.overall_risk_level) suggestions.extend(base_suggestions) # 2. 基于风险分布生成针对性建议 distribution_suggestions = self._generate_distribution_suggestions( aggregated_result.risk_distribution ) suggestions.extend(distribution_suggestions) # 3. 基于关键风险生成专项建议 key_risk_suggestions = self._generate_key_risk_suggestions( aggregated_result.results ) suggestions.extend(key_risk_suggestions) # 4. 基于执行情况生成技术性建议 if aggregated_result.failed_rules > 0: technical_suggestions = [ f"有 {aggregated_result.failed_rules} 个规则执行失败,建议检查数据完整性", "检查数据库连接和查询条件是否正确", "验证相关数据表是否存在且有数据", ] suggestions.extend(technical_suggestions) # 5. 生成持续改进建议 improvement_suggestions = self._generate_improvement_suggestions( aggregated_result.overall_risk_level ) suggestions.extend(improvement_suggestions) # 去重并限制数量 unique_suggestions = list(dict.fromkeys(suggestions)) # 保持顺序的去重 logger.info(f"整改建议生成完成,共 {len(unique_suggestions)} 条建议") return unique_suggestions[:10] # 最多返回10条建议 def _generate_base_suggestions(self, risk_level: RiskLevel) -> List[str]: """生成基础建议""" return self.suggestion_templates.get(risk_level, []) def _generate_distribution_suggestions( self, distribution: Dict[str, int] ) -> List[str]: """基于风险分布生成建议""" suggestions = [] critical_count = distribution.get(RiskLevel.CRITICAL.value, 0) high_count = distribution.get(RiskLevel.HIGH.value, 0) if critical_count > 0: suggestions.append(f"发现 {critical_count} 个严重风险点,需要优先处理") if high_count > 3: suggestions.append(f"高风险项目较多({high_count}个),建议进行全面审查") return suggestions def _generate_key_risk_suggestions(self, results: List[DetectionResult]) -> List[str]: """基于关键风险生成建议""" suggestions = [] # 按算法类型分组 algorithm_suggestions = { "REVENUE_INTEGRITY_CHECK": [ "重点检查收入完整性,确保所有收入都已正确申报", "核对平台充值记录与申报数据的一致性", ], "PRIVATE_ACCOUNT_DETECTION": [ "检查是否存在私户收款情况", "规范收款账户管理,避免税务风险", ], "INVOICE_FRAUD_DETECTION": [ "重点审查发票开具的合规性", "确保发票信息真实、完整、准确", ], "EXPENSE_ANOMALY_DETECTION": [ "检查成本费用的真实性和合理性", "确保费用列支符合税法规定", ], "TAX_RATE_CHECK": [ "核实税率使用的正确性", "确保各税种税率符合最新政策", ], } for result in results: if result.risk_level in [RiskLevel.CRITICAL, RiskLevel.HIGH]: # 尝试从描述中提取算法代码(简单匹配) for algo_code, algo_suggestions in algorithm_suggestions.items(): if algo_code in result.rule_id: suggestions.extend(algo_suggestions) break return suggestions def _generate_improvement_suggestions(self, risk_level: RiskLevel) -> List[str]: """生成持续改进建议""" if risk_level in [RiskLevel.CRITICAL, RiskLevel.HIGH]: return [ "建立完善的税务风险管理体系", "定期对财务人员进行税务培训", "引入专业的税务合规系统", "建立与税务机关的定期沟通机制", ] elif risk_level == RiskLevel.MEDIUM: return [ "完善内部控制制度", "加强税务申报的审核机制", "定期进行内部税务自查", ] else: return [ "继续保持良好的合规状态", "定期更新税务政策知识", "完善税务档案管理", ] class ResultProcessor: """ 结果处理器(门面类) 整合所有结果处理组件,提供统一接口 """ def __init__(self): self.aggregator = ResultAggregator() self.score_calculator = RiskScoreCalculator() self.evidence_builder = EvidenceBuilder() self.suggestion_generator = SuggestionGenerator() def process_results( self, results: List[DetectionResult], task_id: str, entity_id: str, entity_type: str, period: str, rule_weights: Optional[Dict[str, float]] = None ) -> Dict[str, Any]: """ 处理检测结果,生成完整报告 Args: results: 检测结果列表 task_id: 任务ID entity_id: 实体ID entity_type: 实体类型 period: 检测期间 rule_weights: 规则权重配置 Returns: 完整的处理结果报告 """ logger.info( f"开始处理 {len(results)} 个检测结果:" f"实体={entity_id}, 期间={period}" ) # 1. 聚合结果 aggregated_result = self.aggregator.aggregate( results, task_id, entity_id, entity_type, period ) # 2. 计算综合风险评分 overall_score, overall_level = self.score_calculator.calculate_comprehensive_score( aggregated_result, rule_weights ) aggregated_result.overall_risk_score = overall_score aggregated_result.overall_risk_level = overall_level # 3. 构建证据链 evidence_chain = self.evidence_builder.build_comprehensive_evidence_chain( aggregated_result ) aggregated_result.evidence_chain = evidence_chain # 4. 生成整改建议 suggestions = self.suggestion_generator.generate_comprehensive_suggestions( aggregated_result ) # 5. 构建最终报告 report = self._build_final_report(aggregated_result, suggestions) logger.info( f"结果处理完成:总体风险等级={overall_level.value}, " f"综合评分={overall_score:.2f}" ) return report def _build_final_report( self, aggregated_result: AggregatedResult, suggestions: List[str] ) -> Dict[str, Any]: """构建最终报告""" return { "report_type": "risk_detection_report", "summary": { "entity_id": aggregated_result.entity_id, "entity_type": aggregated_result.entity_type, "period": aggregated_result.period, "task_id": aggregated_result.task_id, "overall_risk_level": aggregated_result.overall_risk_level.value, "overall_risk_score": aggregated_result.overall_risk_score, "total_rules": aggregated_result.total_rules, "executed_rules": aggregated_result.executed_rules, "failed_rules": aggregated_result.failed_rules, "detection_duration": aggregated_result.total_duration, }, "risk_analysis": { "risk_distribution": aggregated_result.risk_distribution, "high_risk_count": ( aggregated_result.risk_distribution.get(RiskLevel.CRITICAL.value, 0) + aggregated_result.risk_distribution.get(RiskLevel.HIGH.value, 0) ), "max_risk_score": max( [r.risk_score for r in aggregated_result.results], default=0.0 ), }, "detailed_results": [r.to_dict() for r in aggregated_result.results], "evidence_chain": [e.to_dict() for e in aggregated_result.evidence_chain], "recommendations": suggestions, "metadata": { "report_generated_at": datetime.now().isoformat(), "processor_version": "1.0.0", "total_evidence_items": len(aggregated_result.evidence_chain), } }