deep-risk/backend/app/services/risk_detection/algorithms/invoice_fraud.py
2025-12-14 20:08:27 +08:00

439 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
发票虚开检测算法
检测发票金额与实际业务是否匹配,识别虚开发票风险
"""
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, timedelta
from sqlalchemy import select, func, and_, or_
from sqlalchemy.ext.asyncio import AsyncSession
from loguru import logger
from .base import (
RiskDetectionAlgorithm,
DetectionContext,
DetectionResult,
RiskEvidence,
)
from app.models.risk_detection import RiskLevel
from app.models.invoice import Invoice
from app.models.order import Order
from app.models.settlement import Settlement
class InvoiceFraudDetectionAlgorithm(RiskDetectionAlgorithm):
"""发票虚开检测算法"""
def get_algorithm_code(self) -> str:
return "INVOICE_FRAUD_DETECTION"
def get_algorithm_name(self) -> str:
return "发票虚开检测"
def get_description(self) -> str:
return (
"通过对比发票数据与订单、结算记录,"
"检测是否存在虚开发票、金额不匹配等风险"
)
async def _do_detect(self, context: DetectionContext) -> DetectionResult:
"""执行发票虚开检测"""
# 获取参数
seller_tax_no = context.get_parameter("seller_tax_no")
period = context.get_parameter("period") # 格式YYYY-MM
threshold_rate = context.get_parameter("threshold_rate", 0.1) # 差异率阈值
if not seller_tax_no or not period:
return self._create_error_result(context, "缺少必要参数seller_tax_no 或 period")
db_session = context.db_session
if not db_session:
return self._create_error_result(context, "缺少数据库会话")
try:
# 获取发票数据
invoices = await self._get_invoices(db_session, seller_tax_no, period)
# 获取订单和结算数据
orders, settlements = await self._get_business_data(
db_session, seller_tax_no, period
)
# 分析发票异常
anomalies = self._analyze_invoice_anomalies(
invoices, orders, settlements, threshold_rate
)
# 计算风险指标
total_invoice_amount = sum(inv["total_amount"] for inv in invoices)
total_order_amount = sum(order["total_amount"] for order in orders)
total_settlement_amount = sum(set["actual_amount"] for set in settlements)
discrepancy = self._calculate_discrepancy(
total_invoice_amount, total_order_amount, total_settlement_amount
)
# 判断风险等级
risk_level, risk_score = self._calculate_risk_level(anomalies, discrepancy)
# 生成风险描述和建议
description, suggestion = self._generate_risk_description(
anomalies, discrepancy, total_invoice_amount
)
# 创建检测结果
result = DetectionResult(
task_id=context.task_id,
rule_id=context.rule_id,
entity_id=seller_tax_no,
entity_type="taxpayer",
risk_level=risk_level,
risk_score=risk_score,
description=description,
suggestion=suggestion,
risk_data={
"invoices": invoices,
"orders": orders,
"settlements": settlements,
"anomalies": anomalies,
"total_invoice_amount": total_invoice_amount,
"total_order_amount": total_order_amount,
"total_settlement_amount": total_settlement_amount,
"discrepancy": discrepancy,
"period": period,
},
)
# 添加证据
result.add_evidence(RiskEvidence(
evidence_type="invoice_summary",
description=f"期间内共开具 {len(invoices)} 张发票,总金额 {total_invoice_amount:,.2f}",
data={
"count": len(invoices),
"total_amount": total_invoice_amount,
},
))
result.add_evidence(RiskEvidence(
evidence_type="business_summary",
description=f"实际业务:订单金额 {total_order_amount:,.2f}元,结算金额 {total_settlement_amount:,.2f}",
data={
"order_amount": total_order_amount,
"settlement_amount": total_settlement_amount,
},
))
if discrepancy > 0:
result.add_evidence(RiskEvidence(
evidence_type="discrepancy",
description=f"发票与业务数据差异:{discrepancy:,.2f}",
data={"discrepancy": discrepancy},
))
# 添加主要异常记录
for anomaly in anomalies[:5]:
result.add_evidence(RiskEvidence(
evidence_type="anomaly_detail",
description=f"异常:{anomaly['description']}",
data=anomaly,
))
return result
except Exception as e:
logger.error(f"发票虚开检测执行失败: {str(e)}", exc_info=True)
return self._create_error_result(context, f"检测执行失败: {str(e)}")
async def _get_invoices(
self, db_session: AsyncSession, seller_tax_no: str, period: str
) -> List[Dict[str, Any]]:
"""获取发票数据"""
try:
start_date, end_date = self._parse_period(period)
stmt = select(Invoice).where(
and_(
Invoice.seller_tax_no == seller_tax_no,
Invoice.invoice_date >= start_date,
Invoice.invoice_date <= end_date,
Invoice.invoice_status == "正常",
)
).order_by(Invoice.invoice_date.desc())
result = await db_session.execute(stmt)
invoices = result.scalars().all()
return [
{
"invoice_id": inv.invoice_id,
"invoice_code": inv.invoice_code,
"invoice_no": inv.invoice_no,
"invoice_date": inv.invoice_date,
"total_amount": inv.total_amount,
"total_tax": inv.total_tax,
"purchaser_name": inv.purchaser_name,
"purchaser_tax_no": inv.purchaser_tax_no,
"direction": inv.direction,
"is_verified": inv.is_verified,
"is_red_invoice": inv.is_red_invoice,
}
for inv in invoices
]
except Exception as e:
logger.error(f"获取发票数据失败: {str(e)}")
return []
async def _get_business_data(
self, db_session: AsyncSession, seller_tax_no: str, period: str
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
"""获取订单和结算数据"""
try:
start_date, end_date = self._parse_period(period)
# 获取订单
order_stmt = select(Order).where(
and_(
Order.order_time >= start_date,
Order.order_time <= end_date,
Order.order_status == "已完成",
)
)
order_result = await db_session.execute(order_stmt)
orders = order_result.scalars().all()
# 获取结算
settlement_stmt = select(Settlement).where(
and_(
Settlement.settlement_start_date >= start_date,
Settlement.settlement_end_date <= end_date,
Settlement.settlement_status == "已结算",
)
)
settlement_result = await db_session.execute(settlement_stmt)
settlements = settlement_result.scalars().all()
orders_data = [
{
"order_id": order.order_id,
"order_time": order.order_time,
"total_amount": order.total_amount,
"actual_payment": order.actual_payment,
}
for order in orders
]
settlements_data = [
{
"settlement_id": settlement.settlement_id,
"settlement_period": settlement.settlement_period,
"actual_settlement_amount": settlement.actual_settlement_amount,
}
for settlement in settlements
]
return orders_data, settlements_data
except Exception as e:
logger.error(f"获取业务数据失败: {str(e)}")
return [], []
def _analyze_invoice_anomalies(
self,
invoices: List[Dict[str, Any]],
orders: List[Dict[str, Any]],
settlements: List[Dict[str, Any]],
threshold_rate: float,
) -> List[Dict[str, Any]]:
"""分析发票异常"""
anomalies = []
# 检查发票与订单金额匹配
if invoices and orders:
invoice_total = sum(inv["total_amount"] for inv in invoices)
order_total = sum(order["total_amount"] for order in orders)
if invoice_total > 0 and order_total > 0:
diff_rate = abs(invoice_total - order_total) / max(invoice_total, order_total)
if diff_rate > threshold_rate:
anomalies.append({
"type": "amount_mismatch",
"description": f"发票总额与订单总额差异率 {diff_rate*100:.2f}% 超过阈值",
"invoice_total": invoice_total,
"order_total": order_total,
"diff_rate": diff_rate,
})
# 检查发票与结算金额匹配
if invoices and settlements:
invoice_total = sum(inv["total_amount"] for inv in invoices)
settlement_total = sum(set["actual_settlement_amount"] for set in settlements)
if invoice_total > 0 and settlement_total > 0:
diff_rate = abs(invoice_total - settlement_total) / max(invoice_total, settlement_total)
if diff_rate > threshold_rate:
anomalies.append({
"type": "settlement_mismatch",
"description": f"发票总额与结算总额差异率 {diff_rate*100:.2f}% 超过阈值",
"invoice_total": invoice_total,
"settlement_total": settlement_total,
"diff_rate": diff_rate,
})
# 检查大量红字发票
red_invoices = [inv for inv in invoices if inv["is_red_invoice"]]
if red_invoices:
red_rate = len(red_invoices) / len(invoices) if invoices else 0
if red_rate > 0.3: # 超过30%为红字发票
anomalies.append({
"type": "high_red_invoice_rate",
"description": f"红字发票比例 {red_rate*100:.2f}% 过高",
"red_count": len(red_invoices),
"total_count": len(invoices),
"red_rate": red_rate,
})
# 检查大额发票
large_invoices = [inv for inv in invoices if inv["total_amount"] > 100000]
if large_invoices:
anomalies.append({
"type": "large_invoices",
"description": f"存在 {len(large_invoices)} 张大额发票(>10万元",
"count": len(large_invoices),
"total_amount": sum(inv["total_amount"] for inv in large_invoices),
})
# 检查未验证的发票
unverified_invoices = [inv for inv in invoices if not inv["is_verified"]]
if unverified_invoices:
unverified_rate = len(unverified_invoices) / len(invoices) if invoices else 0
if unverified_rate > 0.2: # 超过20%未验证
anomalies.append({
"type": "high_unverified_rate",
"description": f"未验证发票比例 {unverified_rate*100:.2f}% 过高",
"unverified_count": len(unverified_invoices),
"total_count": len(invoices),
"unverified_rate": unverified_rate,
})
return anomalies
def _calculate_discrepancy(
self,
invoice_amount: float,
order_amount: float,
settlement_amount: float,
) -> float:
"""计算金额差异"""
# 以发票金额为基准,计算与订单和结算的平均差异
if invoice_amount == 0:
return 0.0
order_diff = abs(invoice_amount - order_amount) if order_amount > 0 else 0
settlement_diff = abs(invoice_amount - settlement_amount) if settlement_amount > 0 else 0
return (order_diff + settlement_diff) / 2
def _calculate_risk_level(
self, anomalies: List[Dict[str, Any]], discrepancy: float
) -> tuple[RiskLevel, float]:
"""计算风险等级"""
if not anomalies and discrepancy < 1000:
return RiskLevel.LOW, 10.0
risk_score = 20.0 # 基础分
# 基于异常类型加分
anomaly_scores = {
"amount_mismatch": 40,
"settlement_mismatch": 35,
"high_red_invoice_rate": 30,
"large_invoices": 20,
"high_unverified_rate": 25,
}
for anomaly in anomalies:
risk_score += anomaly_scores.get(anomaly["type"], 10)
# 基于差异金额加分
if discrepancy > 500000:
risk_score += 30
elif discrepancy > 200000:
risk_score += 20
elif discrepancy > 100000:
risk_score += 10
# 限制分数范围
risk_score = min(risk_score, 100.0)
# 判断风险等级
if risk_score >= 90:
return RiskLevel.CRITICAL, risk_score
elif risk_score >= 70:
return RiskLevel.HIGH, risk_score
elif risk_score >= 50:
return RiskLevel.MEDIUM, risk_score
else:
return RiskLevel.LOW, risk_score
def _generate_risk_description(
self,
anomalies: List[Dict[str, Any]],
discrepancy: float,
total_invoice_amount: float,
) -> tuple[str, str]:
"""生成风险描述和建议"""
if anomalies:
anomaly_count = len(anomalies)
anomaly_types = [a["type"] for a in anomalies]
description = (
f"检测到 {anomaly_count} 项发票异常风险:"
f"发票总金额 {total_invoice_amount:,.2f}元,"
f"存在金额不匹配、红字发票比例过高等问题,"
f"可能涉及虚开发票风险。"
)
suggestion = (
"1. 核实发票开具的真实性和合法性;\n"
"2. 检查发票与实际业务的对应关系;\n"
"3. 提供相关合同、订单、结算证明材料;\n"
"4. 对红字发票说明冲红原因;\n"
"5. 及时验证发票真伪;\n"
"6. 如发现问题,建议主动补正申报。"
)
else:
description = "发票数据检查未发现明显异常,开票行为基本规范。"
suggestion = "继续保持规范的发票管理。"
return description, suggestion
def _parse_period(self, period: str) -> tuple[datetime, datetime]:
"""解析期间为开始和结束日期"""
try:
year, month = map(int, period.split("-"))
start_date = datetime(year, month, 1)
if month == 12:
end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
else:
end_date = datetime(year, month + 1, 1) - timedelta(days=1)
return start_date, end_date
except Exception as e:
logger.error(f"解析期间失败: {period}, 错误: {str(e)}")
now = datetime.now()
start_date = datetime(now.year, now.month, 1)
end_date = now
return start_date, end_date
def _create_error_result(self, context: DetectionContext, error_message: str) -> DetectionResult:
"""创建错误结果"""
return DetectionResult(
task_id=context.task_id,
rule_id=context.rule_id,
entity_id=context.get_parameter("seller_tax_no", ""),
entity_type="taxpayer",
risk_level=RiskLevel.UNKNOWN,
risk_score=0.0,
description=f"发票虚开检测失败: {error_message}",
suggestion="请检查参数设置或联系系统管理员",
)