473 lines
18 KiB
Python
473 lines
18 KiB
Python
"""
|
||
成本费用异常检测算法
|
||
检测成本费用是否合理,识别虚增成本、费用异常等风险
|
||
"""
|
||
from typing import Dict, Any, List, Optional, Tuple
|
||
from datetime import datetime, timedelta
|
||
from sqlalchemy import select, func, and_, or_
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
from loguru import logger
|
||
import statistics
|
||
|
||
from .base import (
|
||
RiskDetectionAlgorithm,
|
||
DetectionContext,
|
||
DetectionResult,
|
||
RiskEvidence,
|
||
)
|
||
from app.models.risk_detection import RiskLevel
|
||
from app.models.expense import Expense
|
||
|
||
|
||
class ExpenseAnomalyDetectionAlgorithm(RiskDetectionAlgorithm):
|
||
"""成本费用异常检测算法"""
|
||
|
||
def get_algorithm_code(self) -> str:
|
||
return "EXPENSE_ANOMALY_DETECTION"
|
||
|
||
def get_algorithm_name(self) -> str:
|
||
return "成本费用异常检测"
|
||
|
||
def get_description(self) -> str:
|
||
return (
|
||
"通过分析成本费用凭证,"
|
||
"检测是否存在虚增成本、费用异常、违规支出等风险"
|
||
)
|
||
|
||
async def _do_detect(self, context: DetectionContext) -> DetectionResult:
|
||
"""执行成本费用异常检测"""
|
||
# 获取参数
|
||
entity_id = context.get_parameter("entity_id")
|
||
entity_type = context.get_parameter("entity_type", "streamer")
|
||
period = context.get_parameter("period") # 格式:YYYY-MM
|
||
threshold_multiplier = context.get_parameter("threshold_multiplier", 2.0) # 异常倍数阈值
|
||
|
||
if not entity_id or not period:
|
||
return self._create_error_result(context, "缺少必要参数:entity_id 或 period")
|
||
|
||
db_session = context.db_session
|
||
if not db_session:
|
||
return self._create_error_result(context, "缺少数据库会话")
|
||
|
||
try:
|
||
# 获取费用数据
|
||
expenses = await self._get_expenses(db_session, entity_id, period)
|
||
|
||
# 获取历史费用数据用于对比
|
||
historical_expenses = await self._get_historical_expenses(
|
||
db_session, entity_id, period
|
||
)
|
||
|
||
# 分析异常
|
||
anomalies = self._analyze_expense_anomalies(
|
||
expenses, historical_expenses, threshold_multiplier
|
||
)
|
||
|
||
# 计算风险指标
|
||
total_expense = sum(exp["expense_amount"] for exp in expenses)
|
||
large_expenses = [exp for exp in expenses if exp["is_large_amount"]]
|
||
cross_border_count = sum(1 for exp in expenses if exp["is_cross_border"])
|
||
|
||
# 计算费用结构
|
||
expense_by_category = self._analyze_expense_structure(expenses)
|
||
|
||
# 判断风险等级
|
||
risk_level, risk_score = self._calculate_risk_level(
|
||
anomalies, large_expenses, cross_border_count
|
||
)
|
||
|
||
# 生成风险描述和建议
|
||
description, suggestion = self._generate_risk_description(
|
||
anomalies, total_expense, len(expenses)
|
||
)
|
||
|
||
# 创建检测结果
|
||
result = DetectionResult(
|
||
task_id=context.task_id,
|
||
rule_id=context.rule_id,
|
||
entity_id=entity_id,
|
||
entity_type=entity_type,
|
||
risk_level=risk_level,
|
||
risk_score=risk_score,
|
||
description=description,
|
||
suggestion=suggestion,
|
||
risk_data={
|
||
"expenses": expenses,
|
||
"historical_expenses": historical_expenses,
|
||
"anomalies": anomalies,
|
||
"total_expense": total_expense,
|
||
"expense_count": len(expenses),
|
||
"large_expenses_count": len(large_expenses),
|
||
"cross_border_count": cross_border_count,
|
||
"expense_by_category": expense_by_category,
|
||
"period": period,
|
||
},
|
||
)
|
||
|
||
# 添加证据
|
||
result.add_evidence(RiskEvidence(
|
||
evidence_type="expense_summary",
|
||
description=f"期间内共有 {len(expenses)} 笔费用凭证,总金额 {total_expense:,.2f}元",
|
||
data={
|
||
"count": len(expenses),
|
||
"total_amount": total_expense,
|
||
},
|
||
))
|
||
|
||
result.add_evidence(RiskEvidence(
|
||
evidence_type="expense_structure",
|
||
description=f"费用结构:{expense_by_category}",
|
||
data=expense_by_category,
|
||
))
|
||
|
||
if large_expenses:
|
||
result.add_evidence(RiskEvidence(
|
||
evidence_type="large_expenses",
|
||
description=f"大额费用(>5万):{len(large_expenses)} 笔",
|
||
data={
|
||
"count": len(large_expenses),
|
||
"total_amount": sum(exp["expense_amount"] for exp in large_expenses),
|
||
},
|
||
))
|
||
|
||
if anomalies:
|
||
for anomaly in anomalies[:3]:
|
||
result.add_evidence(RiskEvidence(
|
||
evidence_type="anomaly_detail",
|
||
description=f"异常:{anomaly['description']}",
|
||
data=anomaly,
|
||
))
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"成本费用异常检测执行失败: {str(e)}", exc_info=True)
|
||
return self._create_error_result(context, f"检测执行失败: {str(e)}")
|
||
|
||
async def _get_expenses(
|
||
self, db_session: AsyncSession, entity_id: str, period: str
|
||
) -> List[Dict[str, Any]]:
|
||
"""获取费用数据"""
|
||
try:
|
||
start_date, end_date = self._parse_period(period)
|
||
|
||
stmt = select(Expense).where(
|
||
and_(
|
||
Expense.payee_name == entity_id, # 假设用收款方作为实体标识
|
||
Expense.expense_date >= start_date,
|
||
Expense.expense_date <= end_date,
|
||
Expense.payment_status == "已支付",
|
||
)
|
||
).order_by(Expense.expense_date.desc())
|
||
|
||
result = await db_session.execute(stmt)
|
||
expenses = result.scalars().all()
|
||
|
||
return [
|
||
{
|
||
"expense_id": exp.expense_id,
|
||
"voucher_no": exp.voucher_no,
|
||
"expense_type": exp.expense_type,
|
||
"expense_category": exp.expense_category,
|
||
"payer_name": exp.payer_name,
|
||
"payee_name": exp.payee_name,
|
||
"expense_date": exp.expense_date,
|
||
"expense_amount": exp.expense_amount,
|
||
"tax_amount": exp.tax_amount,
|
||
"tax_rate": exp.tax_rate,
|
||
"payment_method": exp.payment_method,
|
||
"is_large_amount": exp.is_large_amount,
|
||
"is_cross_border": exp.is_cross_border,
|
||
"fiscal_year": exp.fiscal_year,
|
||
"fiscal_period": exp.fiscal_period,
|
||
}
|
||
for exp in expenses
|
||
]
|
||
except Exception as e:
|
||
logger.error(f"获取费用数据失败: {str(e)}")
|
||
return []
|
||
|
||
async def _get_historical_expenses(
|
||
self, db_session: AsyncSession, entity_id: str, period: str
|
||
) -> List[Dict[str, Any]]:
|
||
"""获取历史费用数据用于对比"""
|
||
try:
|
||
# 获取前6个月的数据
|
||
year, month = map(int, period.split("-"))
|
||
start_year = year if month > 6 else year - 1
|
||
start_month = (month - 6) if month > 6 else month + 6
|
||
|
||
start_date = datetime(start_year, start_month, 1)
|
||
end_date = datetime(year, month, 1) - timedelta(days=1)
|
||
|
||
stmt = select(Expense).where(
|
||
and_(
|
||
Expense.payee_name == entity_id,
|
||
Expense.expense_date >= start_date,
|
||
Expense.expense_date < datetime(year, month, 1),
|
||
Expense.payment_status == "已支付",
|
||
)
|
||
).order_by(Expense.expense_date.desc())
|
||
|
||
result = await db_session.execute(stmt)
|
||
expenses = result.scalars().all()
|
||
|
||
return [
|
||
{
|
||
"expense_date": exp.expense_date,
|
||
"expense_amount": exp.expense_amount,
|
||
"expense_category": exp.expense_category,
|
||
}
|
||
for exp in expenses
|
||
]
|
||
except Exception as e:
|
||
logger.error(f"获取历史费用数据失败: {str(e)}")
|
||
return []
|
||
|
||
def _analyze_expense_anomalies(
|
||
self,
|
||
expenses: List[Dict[str, Any]],
|
||
historical_expenses: List[Dict[str, Any]],
|
||
threshold_multiplier: float,
|
||
) -> List[Dict[str, Any]]:
|
||
"""分析费用异常"""
|
||
anomalies = []
|
||
|
||
if not expenses:
|
||
return anomalies
|
||
|
||
current_total = sum(exp["expense_amount"] for exp in expenses)
|
||
|
||
# 1. 检查费用总额异常增长
|
||
if historical_expenses:
|
||
historical_monthly_avg = self._calculate_monthly_average(
|
||
historical_expenses
|
||
)
|
||
if historical_monthly_avg > 0:
|
||
growth_rate = (current_total - historical_monthly_avg) / historical_monthly_avg
|
||
if growth_rate > threshold_multiplier: # 增长超过阈值
|
||
anomalies.append({
|
||
"type": "abnormal_growth",
|
||
"description": f"费用总额同比增长 {growth_rate*100:.1f}%,超出合理范围",
|
||
"current_total": current_total,
|
||
"historical_avg": historical_monthly_avg,
|
||
"growth_rate": growth_rate,
|
||
})
|
||
|
||
# 2. 检查大额费用集中
|
||
large_expenses = [exp for exp in expenses if exp["is_large_amount"]]
|
||
if large_expenses:
|
||
large_ratio = len(large_expenses) / len(expenses)
|
||
if large_ratio > 0.3: # 大额费用占比超过30%
|
||
anomalies.append({
|
||
"type": "large_expense_concentration",
|
||
"description": f"大额费用占比 {large_ratio*100:.1f}% 过高",
|
||
"large_count": len(large_expenses),
|
||
"total_count": len(expenses),
|
||
"ratio": large_ratio,
|
||
})
|
||
|
||
# 3. 检查跨境支付异常
|
||
cross_border_expenses = [exp for exp in expenses if exp["is_cross_border"]]
|
||
if cross_border_expenses:
|
||
cross_border_amount = sum(exp["expense_amount"] for exp in cross_border_expenses)
|
||
cross_border_ratio = cross_border_amount / current_total if current_total > 0 else 0
|
||
if cross_border_ratio > 0.5: # 跨境支付占比超过50%
|
||
anomalies.append({
|
||
"type": "high_cross_border_ratio",
|
||
"description": f"跨境支付占比 {cross_border_ratio*100:.1f}% 过高",
|
||
"amount": cross_border_amount,
|
||
"ratio": cross_border_ratio,
|
||
})
|
||
|
||
# 4. 检查费用类别异常
|
||
expense_by_category = self._analyze_expense_structure(expenses)
|
||
for category, amount in expense_by_category.items():
|
||
ratio = amount / current_total if current_total > 0 else 0
|
||
if category in ["其他"] and ratio > 0.5: # 其他类费用占比过高
|
||
anomalies.append({
|
||
"type": "unusual_category_ratio",
|
||
"description": f"{category}费用占比 {ratio*100:.1f}% 过高",
|
||
"category": category,
|
||
"amount": amount,
|
||
"ratio": ratio,
|
||
})
|
||
|
||
# 5. 检查税负异常
|
||
avg_tax_rate = statistics.mean([exp["tax_rate"] for exp in expenses if exp["tax_rate"] > 0]) if expenses else 0
|
||
if avg_tax_rate > 0.13: # 增值税税率超过13%
|
||
anomalies.append({
|
||
"type": "abnormal_tax_rate",
|
||
"description": f"平均税率 {avg_tax_rate*100:.2f}% 异常偏高",
|
||
"avg_tax_rate": avg_tax_rate,
|
||
})
|
||
|
||
# 6. 检查同日多笔大额费用
|
||
date_amounts = {}
|
||
for exp in expenses:
|
||
date_key = exp["expense_date"].strftime("%Y-%m-%d")
|
||
if date_key not in date_amounts:
|
||
date_amounts[date_key] = []
|
||
date_amounts[date_key].append(exp["expense_amount"])
|
||
|
||
for date, amounts in date_amounts.items():
|
||
if len(amounts) > 5: # 同日超过5笔费用
|
||
total = sum(amounts)
|
||
if any(amount > 100000 for amount in amounts): # 包含大额费用
|
||
anomalies.append({
|
||
"type": "same_day_multiple_expenses",
|
||
"description": f"{date} 同日发生 {len(amounts)} 笔费用,存在拆分支出嫌疑",
|
||
"date": date,
|
||
"count": len(amounts),
|
||
"total": total,
|
||
})
|
||
|
||
return anomalies
|
||
|
||
def _analyze_expense_structure(
|
||
self, expenses: List[Dict[str, Any]]
|
||
) -> Dict[str, float]:
|
||
"""分析费用结构"""
|
||
expense_by_category = {}
|
||
total = sum(exp["expense_amount"] for exp in expenses)
|
||
|
||
for exp in expenses:
|
||
category = exp["expense_category"]
|
||
if category not in expense_by_category:
|
||
expense_by_category[category] = 0
|
||
expense_by_category[category] += exp["expense_amount"]
|
||
|
||
# 转换为百分比
|
||
if total > 0:
|
||
for category in expense_by_category:
|
||
expense_by_category[category] = expense_by_category[category] / total * 100
|
||
|
||
return expense_by_category
|
||
|
||
def _calculate_monthly_average(
|
||
self, historical_expenses: List[Dict[str, Any]]
|
||
) -> float:
|
||
"""计算月均费用"""
|
||
if not historical_expenses:
|
||
return 0.0
|
||
|
||
# 按月分组
|
||
monthly_totals = {}
|
||
for exp in historical_expenses:
|
||
month_key = exp["expense_date"].strftime("%Y-%m")
|
||
if month_key not in monthly_totals:
|
||
monthly_totals[month_key] = 0
|
||
monthly_totals[month_key] += exp["expense_amount"]
|
||
|
||
if not monthly_totals:
|
||
return 0.0
|
||
|
||
return statistics.mean(monthly_totals.values())
|
||
|
||
def _calculate_risk_level(
|
||
self,
|
||
anomalies: List[Dict[str, Any]],
|
||
large_expenses: List[Dict[str, Any]],
|
||
cross_border_count: int,
|
||
) -> tuple[RiskLevel, float]:
|
||
"""计算风险等级"""
|
||
if not anomalies:
|
||
return RiskLevel.LOW, 10.0
|
||
|
||
risk_score = 30.0 # 基础分
|
||
|
||
# 基于异常类型加分
|
||
anomaly_scores = {
|
||
"abnormal_growth": 35,
|
||
"large_expense_concentration": 25,
|
||
"high_cross_border_ratio": 30,
|
||
"unusual_category_ratio": 20,
|
||
"abnormal_tax_rate": 25,
|
||
"same_day_multiple_expenses": 30,
|
||
}
|
||
|
||
for anomaly in anomalies:
|
||
risk_score += anomaly_scores.get(anomaly["type"], 10)
|
||
|
||
# 基于大额费用数量加分
|
||
if len(large_expenses) > 10:
|
||
risk_score += 20
|
||
elif len(large_expenses) > 5:
|
||
risk_score += 10
|
||
|
||
# 基于跨境支付数量加分
|
||
if cross_border_count > 5:
|
||
risk_score += 15
|
||
|
||
# 限制分数范围
|
||
risk_score = min(risk_score, 100.0)
|
||
|
||
# 判断风险等级
|
||
if risk_score >= 90:
|
||
return RiskLevel.CRITICAL, risk_score
|
||
elif risk_score >= 70:
|
||
return RiskLevel.HIGH, risk_score
|
||
elif risk_score >= 50:
|
||
return RiskLevel.MEDIUM, risk_score
|
||
else:
|
||
return RiskLevel.LOW, risk_score
|
||
|
||
def _generate_risk_description(
|
||
self,
|
||
anomalies: List[Dict[str, Any]],
|
||
total_expense: float,
|
||
expense_count: int,
|
||
) -> tuple[str, str]:
|
||
"""生成风险描述和建议"""
|
||
if anomalies:
|
||
anomaly_count = len(anomalies)
|
||
description = (
|
||
f"检测到 {anomaly_count} 项成本费用异常:"
|
||
f"期间内共有 {expense_count} 笔费用,总金额 {total_expense:,.2f}元,"
|
||
f"存在费用异常增长、大额费用集中等问题。"
|
||
)
|
||
|
||
suggestion = (
|
||
"1. 核实大额费用的真实性和合理性;\n"
|
||
"2. 检查是否存在虚增成本的情况;\n"
|
||
"3. 提供费用支出的相关证明材料;\n"
|
||
"4. 规范费用分类和会计处理;\n"
|
||
"5. 建立费用审批和内控制度;\n"
|
||
"6. 如发现问题,及时调整账务处理。"
|
||
)
|
||
else:
|
||
description = "成本费用检查未发现明显异常,费用支出基本规范。"
|
||
suggestion = "继续保持良好的费用管理。"
|
||
|
||
return description, suggestion
|
||
|
||
def _parse_period(self, period: str) -> tuple[datetime, datetime]:
|
||
"""解析期间为开始和结束日期"""
|
||
try:
|
||
year, month = map(int, period.split("-"))
|
||
start_date = datetime(year, month, 1)
|
||
if month == 12:
|
||
end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
|
||
else:
|
||
end_date = datetime(year, month + 1, 1) - timedelta(days=1)
|
||
return start_date, end_date
|
||
except Exception as e:
|
||
logger.error(f"解析期间失败: {period}, 错误: {str(e)}")
|
||
now = datetime.now()
|
||
start_date = datetime(now.year, now.month, 1)
|
||
end_date = now
|
||
return start_date, end_date
|
||
|
||
def _create_error_result(self, context: DetectionContext, error_message: str) -> DetectionResult:
|
||
"""创建错误结果"""
|
||
return DetectionResult(
|
||
task_id=context.task_id,
|
||
rule_id=context.rule_id,
|
||
entity_id=context.get_parameter("entity_id", ""),
|
||
entity_type=context.get_parameter("entity_type", "streamer"),
|
||
risk_level=RiskLevel.UNKNOWN,
|
||
risk_score=0.0,
|
||
description=f"成本费用异常检测失败: {error_message}",
|
||
suggestion="请检查参数设置或联系系统管理员",
|
||
)
|