""" 成本费用异常检测算法 检测成本费用是否合理,识别虚增成本、费用异常等风险 """ from typing import Dict, Any, List, Optional, Tuple from datetime import datetime, timedelta from sqlalchemy import select, func, and_, or_ from sqlalchemy.ext.asyncio import AsyncSession from loguru import logger import statistics from .base import ( RiskDetectionAlgorithm, DetectionContext, DetectionResult, RiskEvidence, ) from app.models.risk_detection import RiskLevel from app.models.expense import Expense class ExpenseAnomalyDetectionAlgorithm(RiskDetectionAlgorithm): """成本费用异常检测算法""" def get_algorithm_code(self) -> str: return "EXPENSE_ANOMALY_DETECTION" def get_algorithm_name(self) -> str: return "成本费用异常检测" def get_description(self) -> str: return ( "通过分析成本费用凭证," "检测是否存在虚增成本、费用异常、违规支出等风险" ) async def _do_detect(self, context: DetectionContext) -> DetectionResult: """执行成本费用异常检测""" # 获取参数 entity_id = context.get_parameter("entity_id") entity_type = context.get_parameter("entity_type", "streamer") period = context.get_parameter("period") # 格式:YYYY-MM threshold_multiplier = context.get_parameter("threshold_multiplier", 2.0) # 异常倍数阈值 if not entity_id or not period: return self._create_error_result(context, "缺少必要参数:entity_id 或 period") db_session = context.db_session if not db_session: return self._create_error_result(context, "缺少数据库会话") try: # 获取费用数据 expenses = await self._get_expenses(db_session, entity_id, period) # 获取历史费用数据用于对比 historical_expenses = await self._get_historical_expenses( db_session, entity_id, period ) # 分析异常 anomalies = self._analyze_expense_anomalies( expenses, historical_expenses, threshold_multiplier ) # 计算风险指标 total_expense = sum(exp["expense_amount"] for exp in expenses) large_expenses = [exp for exp in expenses if exp["is_large_amount"]] cross_border_count = sum(1 for exp in expenses if exp["is_cross_border"]) # 计算费用结构 expense_by_category = self._analyze_expense_structure(expenses) # 判断风险等级 risk_level, risk_score = self._calculate_risk_level( anomalies, large_expenses, cross_border_count ) # 生成风险描述和建议 description, suggestion = self._generate_risk_description( anomalies, total_expense, len(expenses) ) # 创建检测结果 result = DetectionResult( task_id=context.task_id, rule_id=context.rule_id, entity_id=entity_id, entity_type=entity_type, risk_level=risk_level, risk_score=risk_score, description=description, suggestion=suggestion, risk_data={ "expenses": expenses, "historical_expenses": historical_expenses, "anomalies": anomalies, "total_expense": total_expense, "expense_count": len(expenses), "large_expenses_count": len(large_expenses), "cross_border_count": cross_border_count, "expense_by_category": expense_by_category, "period": period, }, ) # 添加证据 result.add_evidence(RiskEvidence( evidence_type="expense_summary", description=f"期间内共有 {len(expenses)} 笔费用凭证,总金额 {total_expense:,.2f}元", data={ "count": len(expenses), "total_amount": total_expense, }, )) result.add_evidence(RiskEvidence( evidence_type="expense_structure", description=f"费用结构:{expense_by_category}", data=expense_by_category, )) if large_expenses: result.add_evidence(RiskEvidence( evidence_type="large_expenses", description=f"大额费用(>5万):{len(large_expenses)} 笔", data={ "count": len(large_expenses), "total_amount": sum(exp["expense_amount"] for exp in large_expenses), }, )) if anomalies: for anomaly in anomalies[:3]: result.add_evidence(RiskEvidence( evidence_type="anomaly_detail", description=f"异常:{anomaly['description']}", data=anomaly, )) return result except Exception as e: logger.error(f"成本费用异常检测执行失败: {str(e)}", exc_info=True) return self._create_error_result(context, f"检测执行失败: {str(e)}") async def _get_expenses( self, db_session: AsyncSession, entity_id: str, period: str ) -> List[Dict[str, Any]]: """获取费用数据""" try: start_date, end_date = self._parse_period(period) stmt = select(Expense).where( and_( Expense.payee_name == entity_id, # 假设用收款方作为实体标识 Expense.expense_date >= start_date, Expense.expense_date <= end_date, Expense.payment_status == "已支付", ) ).order_by(Expense.expense_date.desc()) result = await db_session.execute(stmt) expenses = result.scalars().all() return [ { "expense_id": exp.expense_id, "voucher_no": exp.voucher_no, "expense_type": exp.expense_type, "expense_category": exp.expense_category, "payer_name": exp.payer_name, "payee_name": exp.payee_name, "expense_date": exp.expense_date, "expense_amount": exp.expense_amount, "tax_amount": exp.tax_amount, "tax_rate": exp.tax_rate, "payment_method": exp.payment_method, "is_large_amount": exp.is_large_amount, "is_cross_border": exp.is_cross_border, "fiscal_year": exp.fiscal_year, "fiscal_period": exp.fiscal_period, } for exp in expenses ] except Exception as e: logger.error(f"获取费用数据失败: {str(e)}") return [] async def _get_historical_expenses( self, db_session: AsyncSession, entity_id: str, period: str ) -> List[Dict[str, Any]]: """获取历史费用数据用于对比""" try: # 获取前6个月的数据 year, month = map(int, period.split("-")) start_year = year if month > 6 else year - 1 start_month = (month - 6) if month > 6 else month + 6 start_date = datetime(start_year, start_month, 1) end_date = datetime(year, month, 1) - timedelta(days=1) stmt = select(Expense).where( and_( Expense.payee_name == entity_id, Expense.expense_date >= start_date, Expense.expense_date < datetime(year, month, 1), Expense.payment_status == "已支付", ) ).order_by(Expense.expense_date.desc()) result = await db_session.execute(stmt) expenses = result.scalars().all() return [ { "expense_date": exp.expense_date, "expense_amount": exp.expense_amount, "expense_category": exp.expense_category, } for exp in expenses ] except Exception as e: logger.error(f"获取历史费用数据失败: {str(e)}") return [] def _analyze_expense_anomalies( self, expenses: List[Dict[str, Any]], historical_expenses: List[Dict[str, Any]], threshold_multiplier: float, ) -> List[Dict[str, Any]]: """分析费用异常""" anomalies = [] if not expenses: return anomalies current_total = sum(exp["expense_amount"] for exp in expenses) # 1. 检查费用总额异常增长 if historical_expenses: historical_monthly_avg = self._calculate_monthly_average( historical_expenses ) if historical_monthly_avg > 0: growth_rate = (current_total - historical_monthly_avg) / historical_monthly_avg if growth_rate > threshold_multiplier: # 增长超过阈值 anomalies.append({ "type": "abnormal_growth", "description": f"费用总额同比增长 {growth_rate*100:.1f}%,超出合理范围", "current_total": current_total, "historical_avg": historical_monthly_avg, "growth_rate": growth_rate, }) # 2. 检查大额费用集中 large_expenses = [exp for exp in expenses if exp["is_large_amount"]] if large_expenses: large_ratio = len(large_expenses) / len(expenses) if large_ratio > 0.3: # 大额费用占比超过30% anomalies.append({ "type": "large_expense_concentration", "description": f"大额费用占比 {large_ratio*100:.1f}% 过高", "large_count": len(large_expenses), "total_count": len(expenses), "ratio": large_ratio, }) # 3. 检查跨境支付异常 cross_border_expenses = [exp for exp in expenses if exp["is_cross_border"]] if cross_border_expenses: cross_border_amount = sum(exp["expense_amount"] for exp in cross_border_expenses) cross_border_ratio = cross_border_amount / current_total if current_total > 0 else 0 if cross_border_ratio > 0.5: # 跨境支付占比超过50% anomalies.append({ "type": "high_cross_border_ratio", "description": f"跨境支付占比 {cross_border_ratio*100:.1f}% 过高", "amount": cross_border_amount, "ratio": cross_border_ratio, }) # 4. 检查费用类别异常 expense_by_category = self._analyze_expense_structure(expenses) for category, amount in expense_by_category.items(): ratio = amount / current_total if current_total > 0 else 0 if category in ["其他"] and ratio > 0.5: # 其他类费用占比过高 anomalies.append({ "type": "unusual_category_ratio", "description": f"{category}费用占比 {ratio*100:.1f}% 过高", "category": category, "amount": amount, "ratio": ratio, }) # 5. 检查税负异常 avg_tax_rate = statistics.mean([exp["tax_rate"] for exp in expenses if exp["tax_rate"] > 0]) if expenses else 0 if avg_tax_rate > 0.13: # 增值税税率超过13% anomalies.append({ "type": "abnormal_tax_rate", "description": f"平均税率 {avg_tax_rate*100:.2f}% 异常偏高", "avg_tax_rate": avg_tax_rate, }) # 6. 检查同日多笔大额费用 date_amounts = {} for exp in expenses: date_key = exp["expense_date"].strftime("%Y-%m-%d") if date_key not in date_amounts: date_amounts[date_key] = [] date_amounts[date_key].append(exp["expense_amount"]) for date, amounts in date_amounts.items(): if len(amounts) > 5: # 同日超过5笔费用 total = sum(amounts) if any(amount > 100000 for amount in amounts): # 包含大额费用 anomalies.append({ "type": "same_day_multiple_expenses", "description": f"{date} 同日发生 {len(amounts)} 笔费用,存在拆分支出嫌疑", "date": date, "count": len(amounts), "total": total, }) return anomalies def _analyze_expense_structure( self, expenses: List[Dict[str, Any]] ) -> Dict[str, float]: """分析费用结构""" expense_by_category = {} total = sum(exp["expense_amount"] for exp in expenses) for exp in expenses: category = exp["expense_category"] if category not in expense_by_category: expense_by_category[category] = 0 expense_by_category[category] += exp["expense_amount"] # 转换为百分比 if total > 0: for category in expense_by_category: expense_by_category[category] = expense_by_category[category] / total * 100 return expense_by_category def _calculate_monthly_average( self, historical_expenses: List[Dict[str, Any]] ) -> float: """计算月均费用""" if not historical_expenses: return 0.0 # 按月分组 monthly_totals = {} for exp in historical_expenses: month_key = exp["expense_date"].strftime("%Y-%m") if month_key not in monthly_totals: monthly_totals[month_key] = 0 monthly_totals[month_key] += exp["expense_amount"] if not monthly_totals: return 0.0 return statistics.mean(monthly_totals.values()) def _calculate_risk_level( self, anomalies: List[Dict[str, Any]], large_expenses: List[Dict[str, Any]], cross_border_count: int, ) -> tuple[RiskLevel, float]: """计算风险等级""" if not anomalies: return RiskLevel.LOW, 10.0 risk_score = 30.0 # 基础分 # 基于异常类型加分 anomaly_scores = { "abnormal_growth": 35, "large_expense_concentration": 25, "high_cross_border_ratio": 30, "unusual_category_ratio": 20, "abnormal_tax_rate": 25, "same_day_multiple_expenses": 30, } for anomaly in anomalies: risk_score += anomaly_scores.get(anomaly["type"], 10) # 基于大额费用数量加分 if len(large_expenses) > 10: risk_score += 20 elif len(large_expenses) > 5: risk_score += 10 # 基于跨境支付数量加分 if cross_border_count > 5: risk_score += 15 # 限制分数范围 risk_score = min(risk_score, 100.0) # 判断风险等级 if risk_score >= 90: return RiskLevel.CRITICAL, risk_score elif risk_score >= 70: return RiskLevel.HIGH, risk_score elif risk_score >= 50: return RiskLevel.MEDIUM, risk_score else: return RiskLevel.LOW, risk_score def _generate_risk_description( self, anomalies: List[Dict[str, Any]], total_expense: float, expense_count: int, ) -> tuple[str, str]: """生成风险描述和建议""" if anomalies: anomaly_count = len(anomalies) description = ( f"检测到 {anomaly_count} 项成本费用异常:" f"期间内共有 {expense_count} 笔费用,总金额 {total_expense:,.2f}元," f"存在费用异常增长、大额费用集中等问题。" ) suggestion = ( "1. 核实大额费用的真实性和合理性;\n" "2. 检查是否存在虚增成本的情况;\n" "3. 提供费用支出的相关证明材料;\n" "4. 规范费用分类和会计处理;\n" "5. 建立费用审批和内控制度;\n" "6. 如发现问题,及时调整账务处理。" ) else: description = "成本费用检查未发现明显异常,费用支出基本规范。" suggestion = "继续保持良好的费用管理。" return description, suggestion def _parse_period(self, period: str) -> tuple[datetime, datetime]: """解析期间为开始和结束日期""" try: year, month = map(int, period.split("-")) start_date = datetime(year, month, 1) if month == 12: end_date = datetime(year + 1, 1, 1) - timedelta(days=1) else: end_date = datetime(year, month + 1, 1) - timedelta(days=1) return start_date, end_date except Exception as e: logger.error(f"解析期间失败: {period}, 错误: {str(e)}") now = datetime.now() start_date = datetime(now.year, now.month, 1) end_date = now return start_date, end_date def _create_error_result(self, context: DetectionContext, error_message: str) -> DetectionResult: """创建错误结果""" return DetectionResult( task_id=context.task_id, rule_id=context.rule_id, entity_id=context.get_parameter("entity_id", ""), entity_type=context.get_parameter("entity_type", "streamer"), risk_level=RiskLevel.UNKNOWN, risk_score=0.0, description=f"成本费用异常检测失败: {error_message}", suggestion="请检查参数设置或联系系统管理员", )