439 lines
17 KiB
Python
439 lines
17 KiB
Python
"""
|
||
发票虚开检测算法
|
||
检测发票金额与实际业务是否匹配,识别虚开发票风险
|
||
"""
|
||
from typing import Dict, Any, List, Optional, Tuple
|
||
from datetime import datetime, timedelta
|
||
from sqlalchemy import select, func, and_, or_
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
from loguru import logger
|
||
|
||
from .base import (
|
||
RiskDetectionAlgorithm,
|
||
DetectionContext,
|
||
DetectionResult,
|
||
RiskEvidence,
|
||
)
|
||
from app.models.risk_detection import RiskLevel
|
||
from app.models.invoice import Invoice
|
||
from app.models.order import Order
|
||
from app.models.settlement import Settlement
|
||
|
||
|
||
class InvoiceFraudDetectionAlgorithm(RiskDetectionAlgorithm):
|
||
"""发票虚开检测算法"""
|
||
|
||
def get_algorithm_code(self) -> str:
|
||
return "INVOICE_FRAUD_DETECTION"
|
||
|
||
def get_algorithm_name(self) -> str:
|
||
return "发票虚开检测"
|
||
|
||
def get_description(self) -> str:
|
||
return (
|
||
"通过对比发票数据与订单、结算记录,"
|
||
"检测是否存在虚开发票、金额不匹配等风险"
|
||
)
|
||
|
||
async def _do_detect(self, context: DetectionContext) -> DetectionResult:
|
||
"""执行发票虚开检测"""
|
||
# 获取参数
|
||
seller_tax_no = context.get_parameter("seller_tax_no")
|
||
period = context.get_parameter("period") # 格式:YYYY-MM
|
||
threshold_rate = context.get_parameter("threshold_rate", 0.1) # 差异率阈值
|
||
|
||
if not seller_tax_no or not period:
|
||
return self._create_error_result(context, "缺少必要参数:seller_tax_no 或 period")
|
||
|
||
db_session = context.db_session
|
||
if not db_session:
|
||
return self._create_error_result(context, "缺少数据库会话")
|
||
|
||
try:
|
||
# 获取发票数据
|
||
invoices = await self._get_invoices(db_session, seller_tax_no, period)
|
||
|
||
# 获取订单和结算数据
|
||
orders, settlements = await self._get_business_data(
|
||
db_session, seller_tax_no, period
|
||
)
|
||
|
||
# 分析发票异常
|
||
anomalies = self._analyze_invoice_anomalies(
|
||
invoices, orders, settlements, threshold_rate
|
||
)
|
||
|
||
# 计算风险指标
|
||
total_invoice_amount = sum(inv["total_amount"] for inv in invoices)
|
||
total_order_amount = sum(order["total_amount"] for order in orders)
|
||
total_settlement_amount = sum(set["actual_amount"] for set in settlements)
|
||
|
||
discrepancy = self._calculate_discrepancy(
|
||
total_invoice_amount, total_order_amount, total_settlement_amount
|
||
)
|
||
|
||
# 判断风险等级
|
||
risk_level, risk_score = self._calculate_risk_level(anomalies, discrepancy)
|
||
|
||
# 生成风险描述和建议
|
||
description, suggestion = self._generate_risk_description(
|
||
anomalies, discrepancy, total_invoice_amount
|
||
)
|
||
|
||
# 创建检测结果
|
||
result = DetectionResult(
|
||
task_id=context.task_id,
|
||
rule_id=context.rule_id,
|
||
entity_id=seller_tax_no,
|
||
entity_type="taxpayer",
|
||
risk_level=risk_level,
|
||
risk_score=risk_score,
|
||
description=description,
|
||
suggestion=suggestion,
|
||
risk_data={
|
||
"invoices": invoices,
|
||
"orders": orders,
|
||
"settlements": settlements,
|
||
"anomalies": anomalies,
|
||
"total_invoice_amount": total_invoice_amount,
|
||
"total_order_amount": total_order_amount,
|
||
"total_settlement_amount": total_settlement_amount,
|
||
"discrepancy": discrepancy,
|
||
"period": period,
|
||
},
|
||
)
|
||
|
||
# 添加证据
|
||
result.add_evidence(RiskEvidence(
|
||
evidence_type="invoice_summary",
|
||
description=f"期间内共开具 {len(invoices)} 张发票,总金额 {total_invoice_amount:,.2f}元",
|
||
data={
|
||
"count": len(invoices),
|
||
"total_amount": total_invoice_amount,
|
||
},
|
||
))
|
||
|
||
result.add_evidence(RiskEvidence(
|
||
evidence_type="business_summary",
|
||
description=f"实际业务:订单金额 {total_order_amount:,.2f}元,结算金额 {total_settlement_amount:,.2f}元",
|
||
data={
|
||
"order_amount": total_order_amount,
|
||
"settlement_amount": total_settlement_amount,
|
||
},
|
||
))
|
||
|
||
if discrepancy > 0:
|
||
result.add_evidence(RiskEvidence(
|
||
evidence_type="discrepancy",
|
||
description=f"发票与业务数据差异:{discrepancy:,.2f}元",
|
||
data={"discrepancy": discrepancy},
|
||
))
|
||
|
||
# 添加主要异常记录
|
||
for anomaly in anomalies[:5]:
|
||
result.add_evidence(RiskEvidence(
|
||
evidence_type="anomaly_detail",
|
||
description=f"异常:{anomaly['description']}",
|
||
data=anomaly,
|
||
))
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"发票虚开检测执行失败: {str(e)}", exc_info=True)
|
||
return self._create_error_result(context, f"检测执行失败: {str(e)}")
|
||
|
||
async def _get_invoices(
|
||
self, db_session: AsyncSession, seller_tax_no: str, period: str
|
||
) -> List[Dict[str, Any]]:
|
||
"""获取发票数据"""
|
||
try:
|
||
start_date, end_date = self._parse_period(period)
|
||
|
||
stmt = select(Invoice).where(
|
||
and_(
|
||
Invoice.seller_tax_no == seller_tax_no,
|
||
Invoice.invoice_date >= start_date,
|
||
Invoice.invoice_date <= end_date,
|
||
Invoice.invoice_status == "正常",
|
||
)
|
||
).order_by(Invoice.invoice_date.desc())
|
||
|
||
result = await db_session.execute(stmt)
|
||
invoices = result.scalars().all()
|
||
|
||
return [
|
||
{
|
||
"invoice_id": inv.invoice_id,
|
||
"invoice_code": inv.invoice_code,
|
||
"invoice_no": inv.invoice_no,
|
||
"invoice_date": inv.invoice_date,
|
||
"total_amount": inv.total_amount,
|
||
"total_tax": inv.total_tax,
|
||
"purchaser_name": inv.purchaser_name,
|
||
"purchaser_tax_no": inv.purchaser_tax_no,
|
||
"direction": inv.direction,
|
||
"is_verified": inv.is_verified,
|
||
"is_red_invoice": inv.is_red_invoice,
|
||
}
|
||
for inv in invoices
|
||
]
|
||
except Exception as e:
|
||
logger.error(f"获取发票数据失败: {str(e)}")
|
||
return []
|
||
|
||
async def _get_business_data(
|
||
self, db_session: AsyncSession, seller_tax_no: str, period: str
|
||
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||
"""获取订单和结算数据"""
|
||
try:
|
||
start_date, end_date = self._parse_period(period)
|
||
|
||
# 获取订单
|
||
order_stmt = select(Order).where(
|
||
and_(
|
||
Order.order_time >= start_date,
|
||
Order.order_time <= end_date,
|
||
Order.order_status == "已完成",
|
||
)
|
||
)
|
||
|
||
order_result = await db_session.execute(order_stmt)
|
||
orders = order_result.scalars().all()
|
||
|
||
# 获取结算
|
||
settlement_stmt = select(Settlement).where(
|
||
and_(
|
||
Settlement.settlement_start_date >= start_date,
|
||
Settlement.settlement_end_date <= end_date,
|
||
Settlement.settlement_status == "已结算",
|
||
)
|
||
)
|
||
|
||
settlement_result = await db_session.execute(settlement_stmt)
|
||
settlements = settlement_result.scalars().all()
|
||
|
||
orders_data = [
|
||
{
|
||
"order_id": order.order_id,
|
||
"order_time": order.order_time,
|
||
"total_amount": order.total_amount,
|
||
"actual_payment": order.actual_payment,
|
||
}
|
||
for order in orders
|
||
]
|
||
|
||
settlements_data = [
|
||
{
|
||
"settlement_id": settlement.settlement_id,
|
||
"settlement_period": settlement.settlement_period,
|
||
"actual_settlement_amount": settlement.actual_settlement_amount,
|
||
}
|
||
for settlement in settlements
|
||
]
|
||
|
||
return orders_data, settlements_data
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取业务数据失败: {str(e)}")
|
||
return [], []
|
||
|
||
def _analyze_invoice_anomalies(
|
||
self,
|
||
invoices: List[Dict[str, Any]],
|
||
orders: List[Dict[str, Any]],
|
||
settlements: List[Dict[str, Any]],
|
||
threshold_rate: float,
|
||
) -> List[Dict[str, Any]]:
|
||
"""分析发票异常"""
|
||
anomalies = []
|
||
|
||
# 检查发票与订单金额匹配
|
||
if invoices and orders:
|
||
invoice_total = sum(inv["total_amount"] for inv in invoices)
|
||
order_total = sum(order["total_amount"] for order in orders)
|
||
|
||
if invoice_total > 0 and order_total > 0:
|
||
diff_rate = abs(invoice_total - order_total) / max(invoice_total, order_total)
|
||
if diff_rate > threshold_rate:
|
||
anomalies.append({
|
||
"type": "amount_mismatch",
|
||
"description": f"发票总额与订单总额差异率 {diff_rate*100:.2f}% 超过阈值",
|
||
"invoice_total": invoice_total,
|
||
"order_total": order_total,
|
||
"diff_rate": diff_rate,
|
||
})
|
||
|
||
# 检查发票与结算金额匹配
|
||
if invoices and settlements:
|
||
invoice_total = sum(inv["total_amount"] for inv in invoices)
|
||
settlement_total = sum(set["actual_settlement_amount"] for set in settlements)
|
||
|
||
if invoice_total > 0 and settlement_total > 0:
|
||
diff_rate = abs(invoice_total - settlement_total) / max(invoice_total, settlement_total)
|
||
if diff_rate > threshold_rate:
|
||
anomalies.append({
|
||
"type": "settlement_mismatch",
|
||
"description": f"发票总额与结算总额差异率 {diff_rate*100:.2f}% 超过阈值",
|
||
"invoice_total": invoice_total,
|
||
"settlement_total": settlement_total,
|
||
"diff_rate": diff_rate,
|
||
})
|
||
|
||
# 检查大量红字发票
|
||
red_invoices = [inv for inv in invoices if inv["is_red_invoice"]]
|
||
if red_invoices:
|
||
red_rate = len(red_invoices) / len(invoices) if invoices else 0
|
||
if red_rate > 0.3: # 超过30%为红字发票
|
||
anomalies.append({
|
||
"type": "high_red_invoice_rate",
|
||
"description": f"红字发票比例 {red_rate*100:.2f}% 过高",
|
||
"red_count": len(red_invoices),
|
||
"total_count": len(invoices),
|
||
"red_rate": red_rate,
|
||
})
|
||
|
||
# 检查大额发票
|
||
large_invoices = [inv for inv in invoices if inv["total_amount"] > 100000]
|
||
if large_invoices:
|
||
anomalies.append({
|
||
"type": "large_invoices",
|
||
"description": f"存在 {len(large_invoices)} 张大额发票(>10万元)",
|
||
"count": len(large_invoices),
|
||
"total_amount": sum(inv["total_amount"] for inv in large_invoices),
|
||
})
|
||
|
||
# 检查未验证的发票
|
||
unverified_invoices = [inv for inv in invoices if not inv["is_verified"]]
|
||
if unverified_invoices:
|
||
unverified_rate = len(unverified_invoices) / len(invoices) if invoices else 0
|
||
if unverified_rate > 0.2: # 超过20%未验证
|
||
anomalies.append({
|
||
"type": "high_unverified_rate",
|
||
"description": f"未验证发票比例 {unverified_rate*100:.2f}% 过高",
|
||
"unverified_count": len(unverified_invoices),
|
||
"total_count": len(invoices),
|
||
"unverified_rate": unverified_rate,
|
||
})
|
||
|
||
return anomalies
|
||
|
||
def _calculate_discrepancy(
|
||
self,
|
||
invoice_amount: float,
|
||
order_amount: float,
|
||
settlement_amount: float,
|
||
) -> float:
|
||
"""计算金额差异"""
|
||
# 以发票金额为基准,计算与订单和结算的平均差异
|
||
if invoice_amount == 0:
|
||
return 0.0
|
||
|
||
order_diff = abs(invoice_amount - order_amount) if order_amount > 0 else 0
|
||
settlement_diff = abs(invoice_amount - settlement_amount) if settlement_amount > 0 else 0
|
||
|
||
return (order_diff + settlement_diff) / 2
|
||
|
||
def _calculate_risk_level(
|
||
self, anomalies: List[Dict[str, Any]], discrepancy: float
|
||
) -> tuple[RiskLevel, float]:
|
||
"""计算风险等级"""
|
||
if not anomalies and discrepancy < 1000:
|
||
return RiskLevel.LOW, 10.0
|
||
|
||
risk_score = 20.0 # 基础分
|
||
|
||
# 基于异常类型加分
|
||
anomaly_scores = {
|
||
"amount_mismatch": 40,
|
||
"settlement_mismatch": 35,
|
||
"high_red_invoice_rate": 30,
|
||
"large_invoices": 20,
|
||
"high_unverified_rate": 25,
|
||
}
|
||
|
||
for anomaly in anomalies:
|
||
risk_score += anomaly_scores.get(anomaly["type"], 10)
|
||
|
||
# 基于差异金额加分
|
||
if discrepancy > 500000:
|
||
risk_score += 30
|
||
elif discrepancy > 200000:
|
||
risk_score += 20
|
||
elif discrepancy > 100000:
|
||
risk_score += 10
|
||
|
||
# 限制分数范围
|
||
risk_score = min(risk_score, 100.0)
|
||
|
||
# 判断风险等级
|
||
if risk_score >= 90:
|
||
return RiskLevel.CRITICAL, risk_score
|
||
elif risk_score >= 70:
|
||
return RiskLevel.HIGH, risk_score
|
||
elif risk_score >= 50:
|
||
return RiskLevel.MEDIUM, risk_score
|
||
else:
|
||
return RiskLevel.LOW, risk_score
|
||
|
||
def _generate_risk_description(
|
||
self,
|
||
anomalies: List[Dict[str, Any]],
|
||
discrepancy: float,
|
||
total_invoice_amount: float,
|
||
) -> tuple[str, str]:
|
||
"""生成风险描述和建议"""
|
||
if anomalies:
|
||
anomaly_count = len(anomalies)
|
||
anomaly_types = [a["type"] for a in anomalies]
|
||
description = (
|
||
f"检测到 {anomaly_count} 项发票异常风险:"
|
||
f"发票总金额 {total_invoice_amount:,.2f}元,"
|
||
f"存在金额不匹配、红字发票比例过高等问题,"
|
||
f"可能涉及虚开发票风险。"
|
||
)
|
||
|
||
suggestion = (
|
||
"1. 核实发票开具的真实性和合法性;\n"
|
||
"2. 检查发票与实际业务的对应关系;\n"
|
||
"3. 提供相关合同、订单、结算证明材料;\n"
|
||
"4. 对红字发票说明冲红原因;\n"
|
||
"5. 及时验证发票真伪;\n"
|
||
"6. 如发现问题,建议主动补正申报。"
|
||
)
|
||
else:
|
||
description = "发票数据检查未发现明显异常,开票行为基本规范。"
|
||
suggestion = "继续保持规范的发票管理。"
|
||
|
||
return description, suggestion
|
||
|
||
def _parse_period(self, period: str) -> tuple[datetime, datetime]:
|
||
"""解析期间为开始和结束日期"""
|
||
try:
|
||
year, month = map(int, period.split("-"))
|
||
start_date = datetime(year, month, 1)
|
||
if month == 12:
|
||
end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
|
||
else:
|
||
end_date = datetime(year, month + 1, 1) - timedelta(days=1)
|
||
return start_date, end_date
|
||
except Exception as e:
|
||
logger.error(f"解析期间失败: {period}, 错误: {str(e)}")
|
||
now = datetime.now()
|
||
start_date = datetime(now.year, now.month, 1)
|
||
end_date = now
|
||
return start_date, end_date
|
||
|
||
def _create_error_result(self, context: DetectionContext, error_message: str) -> DetectionResult:
|
||
"""创建错误结果"""
|
||
return DetectionResult(
|
||
task_id=context.task_id,
|
||
rule_id=context.rule_id,
|
||
entity_id=context.get_parameter("seller_tax_no", ""),
|
||
entity_type="taxpayer",
|
||
risk_level=RiskLevel.UNKNOWN,
|
||
risk_score=0.0,
|
||
description=f"发票虚开检测失败: {error_message}",
|
||
suggestion="请检查参数设置或联系系统管理员",
|
||
)
|