deep-risk/backend/app/services/risk_detection/algorithms/expense_anomaly.py
2025-12-14 20:08:27 +08:00

473 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
成本费用异常检测算法
检测成本费用是否合理,识别虚增成本、费用异常等风险
"""
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, timedelta
from sqlalchemy import select, func, and_, or_
from sqlalchemy.ext.asyncio import AsyncSession
from loguru import logger
import statistics
from .base import (
RiskDetectionAlgorithm,
DetectionContext,
DetectionResult,
RiskEvidence,
)
from app.models.risk_detection import RiskLevel
from app.models.expense import Expense
class ExpenseAnomalyDetectionAlgorithm(RiskDetectionAlgorithm):
"""成本费用异常检测算法"""
def get_algorithm_code(self) -> str:
return "EXPENSE_ANOMALY_DETECTION"
def get_algorithm_name(self) -> str:
return "成本费用异常检测"
def get_description(self) -> str:
return (
"通过分析成本费用凭证,"
"检测是否存在虚增成本、费用异常、违规支出等风险"
)
async def _do_detect(self, context: DetectionContext) -> DetectionResult:
"""执行成本费用异常检测"""
# 获取参数
entity_id = context.get_parameter("entity_id")
entity_type = context.get_parameter("entity_type", "streamer")
period = context.get_parameter("period") # 格式YYYY-MM
threshold_multiplier = context.get_parameter("threshold_multiplier", 2.0) # 异常倍数阈值
if not entity_id or not period:
return self._create_error_result(context, "缺少必要参数entity_id 或 period")
db_session = context.db_session
if not db_session:
return self._create_error_result(context, "缺少数据库会话")
try:
# 获取费用数据
expenses = await self._get_expenses(db_session, entity_id, period)
# 获取历史费用数据用于对比
historical_expenses = await self._get_historical_expenses(
db_session, entity_id, period
)
# 分析异常
anomalies = self._analyze_expense_anomalies(
expenses, historical_expenses, threshold_multiplier
)
# 计算风险指标
total_expense = sum(exp["expense_amount"] for exp in expenses)
large_expenses = [exp for exp in expenses if exp["is_large_amount"]]
cross_border_count = sum(1 for exp in expenses if exp["is_cross_border"])
# 计算费用结构
expense_by_category = self._analyze_expense_structure(expenses)
# 判断风险等级
risk_level, risk_score = self._calculate_risk_level(
anomalies, large_expenses, cross_border_count
)
# 生成风险描述和建议
description, suggestion = self._generate_risk_description(
anomalies, total_expense, len(expenses)
)
# 创建检测结果
result = DetectionResult(
task_id=context.task_id,
rule_id=context.rule_id,
entity_id=entity_id,
entity_type=entity_type,
risk_level=risk_level,
risk_score=risk_score,
description=description,
suggestion=suggestion,
risk_data={
"expenses": expenses,
"historical_expenses": historical_expenses,
"anomalies": anomalies,
"total_expense": total_expense,
"expense_count": len(expenses),
"large_expenses_count": len(large_expenses),
"cross_border_count": cross_border_count,
"expense_by_category": expense_by_category,
"period": period,
},
)
# 添加证据
result.add_evidence(RiskEvidence(
evidence_type="expense_summary",
description=f"期间内共有 {len(expenses)} 笔费用凭证,总金额 {total_expense:,.2f}",
data={
"count": len(expenses),
"total_amount": total_expense,
},
))
result.add_evidence(RiskEvidence(
evidence_type="expense_structure",
description=f"费用结构:{expense_by_category}",
data=expense_by_category,
))
if large_expenses:
result.add_evidence(RiskEvidence(
evidence_type="large_expenses",
description=f"大额费用(>5万{len(large_expenses)}",
data={
"count": len(large_expenses),
"total_amount": sum(exp["expense_amount"] for exp in large_expenses),
},
))
if anomalies:
for anomaly in anomalies[:3]:
result.add_evidence(RiskEvidence(
evidence_type="anomaly_detail",
description=f"异常:{anomaly['description']}",
data=anomaly,
))
return result
except Exception as e:
logger.error(f"成本费用异常检测执行失败: {str(e)}", exc_info=True)
return self._create_error_result(context, f"检测执行失败: {str(e)}")
async def _get_expenses(
self, db_session: AsyncSession, entity_id: str, period: str
) -> List[Dict[str, Any]]:
"""获取费用数据"""
try:
start_date, end_date = self._parse_period(period)
stmt = select(Expense).where(
and_(
Expense.payee_name == entity_id, # 假设用收款方作为实体标识
Expense.expense_date >= start_date,
Expense.expense_date <= end_date,
Expense.payment_status == "已支付",
)
).order_by(Expense.expense_date.desc())
result = await db_session.execute(stmt)
expenses = result.scalars().all()
return [
{
"expense_id": exp.expense_id,
"voucher_no": exp.voucher_no,
"expense_type": exp.expense_type,
"expense_category": exp.expense_category,
"payer_name": exp.payer_name,
"payee_name": exp.payee_name,
"expense_date": exp.expense_date,
"expense_amount": exp.expense_amount,
"tax_amount": exp.tax_amount,
"tax_rate": exp.tax_rate,
"payment_method": exp.payment_method,
"is_large_amount": exp.is_large_amount,
"is_cross_border": exp.is_cross_border,
"fiscal_year": exp.fiscal_year,
"fiscal_period": exp.fiscal_period,
}
for exp in expenses
]
except Exception as e:
logger.error(f"获取费用数据失败: {str(e)}")
return []
async def _get_historical_expenses(
self, db_session: AsyncSession, entity_id: str, period: str
) -> List[Dict[str, Any]]:
"""获取历史费用数据用于对比"""
try:
# 获取前6个月的数据
year, month = map(int, period.split("-"))
start_year = year if month > 6 else year - 1
start_month = (month - 6) if month > 6 else month + 6
start_date = datetime(start_year, start_month, 1)
end_date = datetime(year, month, 1) - timedelta(days=1)
stmt = select(Expense).where(
and_(
Expense.payee_name == entity_id,
Expense.expense_date >= start_date,
Expense.expense_date < datetime(year, month, 1),
Expense.payment_status == "已支付",
)
).order_by(Expense.expense_date.desc())
result = await db_session.execute(stmt)
expenses = result.scalars().all()
return [
{
"expense_date": exp.expense_date,
"expense_amount": exp.expense_amount,
"expense_category": exp.expense_category,
}
for exp in expenses
]
except Exception as e:
logger.error(f"获取历史费用数据失败: {str(e)}")
return []
def _analyze_expense_anomalies(
self,
expenses: List[Dict[str, Any]],
historical_expenses: List[Dict[str, Any]],
threshold_multiplier: float,
) -> List[Dict[str, Any]]:
"""分析费用异常"""
anomalies = []
if not expenses:
return anomalies
current_total = sum(exp["expense_amount"] for exp in expenses)
# 1. 检查费用总额异常增长
if historical_expenses:
historical_monthly_avg = self._calculate_monthly_average(
historical_expenses
)
if historical_monthly_avg > 0:
growth_rate = (current_total - historical_monthly_avg) / historical_monthly_avg
if growth_rate > threshold_multiplier: # 增长超过阈值
anomalies.append({
"type": "abnormal_growth",
"description": f"费用总额同比增长 {growth_rate*100:.1f}%,超出合理范围",
"current_total": current_total,
"historical_avg": historical_monthly_avg,
"growth_rate": growth_rate,
})
# 2. 检查大额费用集中
large_expenses = [exp for exp in expenses if exp["is_large_amount"]]
if large_expenses:
large_ratio = len(large_expenses) / len(expenses)
if large_ratio > 0.3: # 大额费用占比超过30%
anomalies.append({
"type": "large_expense_concentration",
"description": f"大额费用占比 {large_ratio*100:.1f}% 过高",
"large_count": len(large_expenses),
"total_count": len(expenses),
"ratio": large_ratio,
})
# 3. 检查跨境支付异常
cross_border_expenses = [exp for exp in expenses if exp["is_cross_border"]]
if cross_border_expenses:
cross_border_amount = sum(exp["expense_amount"] for exp in cross_border_expenses)
cross_border_ratio = cross_border_amount / current_total if current_total > 0 else 0
if cross_border_ratio > 0.5: # 跨境支付占比超过50%
anomalies.append({
"type": "high_cross_border_ratio",
"description": f"跨境支付占比 {cross_border_ratio*100:.1f}% 过高",
"amount": cross_border_amount,
"ratio": cross_border_ratio,
})
# 4. 检查费用类别异常
expense_by_category = self._analyze_expense_structure(expenses)
for category, amount in expense_by_category.items():
ratio = amount / current_total if current_total > 0 else 0
if category in ["其他"] and ratio > 0.5: # 其他类费用占比过高
anomalies.append({
"type": "unusual_category_ratio",
"description": f"{category}费用占比 {ratio*100:.1f}% 过高",
"category": category,
"amount": amount,
"ratio": ratio,
})
# 5. 检查税负异常
avg_tax_rate = statistics.mean([exp["tax_rate"] for exp in expenses if exp["tax_rate"] > 0]) if expenses else 0
if avg_tax_rate > 0.13: # 增值税税率超过13%
anomalies.append({
"type": "abnormal_tax_rate",
"description": f"平均税率 {avg_tax_rate*100:.2f}% 异常偏高",
"avg_tax_rate": avg_tax_rate,
})
# 6. 检查同日多笔大额费用
date_amounts = {}
for exp in expenses:
date_key = exp["expense_date"].strftime("%Y-%m-%d")
if date_key not in date_amounts:
date_amounts[date_key] = []
date_amounts[date_key].append(exp["expense_amount"])
for date, amounts in date_amounts.items():
if len(amounts) > 5: # 同日超过5笔费用
total = sum(amounts)
if any(amount > 100000 for amount in amounts): # 包含大额费用
anomalies.append({
"type": "same_day_multiple_expenses",
"description": f"{date} 同日发生 {len(amounts)} 笔费用,存在拆分支出嫌疑",
"date": date,
"count": len(amounts),
"total": total,
})
return anomalies
def _analyze_expense_structure(
self, expenses: List[Dict[str, Any]]
) -> Dict[str, float]:
"""分析费用结构"""
expense_by_category = {}
total = sum(exp["expense_amount"] for exp in expenses)
for exp in expenses:
category = exp["expense_category"]
if category not in expense_by_category:
expense_by_category[category] = 0
expense_by_category[category] += exp["expense_amount"]
# 转换为百分比
if total > 0:
for category in expense_by_category:
expense_by_category[category] = expense_by_category[category] / total * 100
return expense_by_category
def _calculate_monthly_average(
self, historical_expenses: List[Dict[str, Any]]
) -> float:
"""计算月均费用"""
if not historical_expenses:
return 0.0
# 按月分组
monthly_totals = {}
for exp in historical_expenses:
month_key = exp["expense_date"].strftime("%Y-%m")
if month_key not in monthly_totals:
monthly_totals[month_key] = 0
monthly_totals[month_key] += exp["expense_amount"]
if not monthly_totals:
return 0.0
return statistics.mean(monthly_totals.values())
def _calculate_risk_level(
self,
anomalies: List[Dict[str, Any]],
large_expenses: List[Dict[str, Any]],
cross_border_count: int,
) -> tuple[RiskLevel, float]:
"""计算风险等级"""
if not anomalies:
return RiskLevel.LOW, 10.0
risk_score = 30.0 # 基础分
# 基于异常类型加分
anomaly_scores = {
"abnormal_growth": 35,
"large_expense_concentration": 25,
"high_cross_border_ratio": 30,
"unusual_category_ratio": 20,
"abnormal_tax_rate": 25,
"same_day_multiple_expenses": 30,
}
for anomaly in anomalies:
risk_score += anomaly_scores.get(anomaly["type"], 10)
# 基于大额费用数量加分
if len(large_expenses) > 10:
risk_score += 20
elif len(large_expenses) > 5:
risk_score += 10
# 基于跨境支付数量加分
if cross_border_count > 5:
risk_score += 15
# 限制分数范围
risk_score = min(risk_score, 100.0)
# 判断风险等级
if risk_score >= 90:
return RiskLevel.CRITICAL, risk_score
elif risk_score >= 70:
return RiskLevel.HIGH, risk_score
elif risk_score >= 50:
return RiskLevel.MEDIUM, risk_score
else:
return RiskLevel.LOW, risk_score
def _generate_risk_description(
self,
anomalies: List[Dict[str, Any]],
total_expense: float,
expense_count: int,
) -> tuple[str, str]:
"""生成风险描述和建议"""
if anomalies:
anomaly_count = len(anomalies)
description = (
f"检测到 {anomaly_count} 项成本费用异常:"
f"期间内共有 {expense_count} 笔费用,总金额 {total_expense:,.2f}元,"
f"存在费用异常增长、大额费用集中等问题。"
)
suggestion = (
"1. 核实大额费用的真实性和合理性;\n"
"2. 检查是否存在虚增成本的情况;\n"
"3. 提供费用支出的相关证明材料;\n"
"4. 规范费用分类和会计处理;\n"
"5. 建立费用审批和内控制度;\n"
"6. 如发现问题,及时调整账务处理。"
)
else:
description = "成本费用检查未发现明显异常,费用支出基本规范。"
suggestion = "继续保持良好的费用管理。"
return description, suggestion
def _parse_period(self, period: str) -> tuple[datetime, datetime]:
"""解析期间为开始和结束日期"""
try:
year, month = map(int, period.split("-"))
start_date = datetime(year, month, 1)
if month == 12:
end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
else:
end_date = datetime(year, month + 1, 1) - timedelta(days=1)
return start_date, end_date
except Exception as e:
logger.error(f"解析期间失败: {period}, 错误: {str(e)}")
now = datetime.now()
start_date = datetime(now.year, now.month, 1)
end_date = now
return start_date, end_date
def _create_error_result(self, context: DetectionContext, error_message: str) -> DetectionResult:
"""创建错误结果"""
return DetectionResult(
task_id=context.task_id,
rule_id=context.rule_id,
entity_id=context.get_parameter("entity_id", ""),
entity_type=context.get_parameter("entity_type", "streamer"),
risk_level=RiskLevel.UNKNOWN,
risk_score=0.0,
description=f"成本费用异常检测失败: {error_message}",
suggestion="请检查参数设置或联系系统管理员",
)