""" 发票虚开检测算法 检测发票金额与实际业务是否匹配,识别虚开发票风险 """ from typing import Dict, Any, List, Optional, Tuple from datetime import datetime, timedelta from sqlalchemy import select, func, and_, or_ from sqlalchemy.ext.asyncio import AsyncSession from loguru import logger from .base import ( RiskDetectionAlgorithm, DetectionContext, DetectionResult, RiskEvidence, ) from app.models.risk_detection import RiskLevel from app.models.invoice import Invoice from app.models.order import Order from app.models.settlement import Settlement class InvoiceFraudDetectionAlgorithm(RiskDetectionAlgorithm): """发票虚开检测算法""" def get_algorithm_code(self) -> str: return "INVOICE_FRAUD_DETECTION" def get_algorithm_name(self) -> str: return "发票虚开检测" def get_description(self) -> str: return ( "通过对比发票数据与订单、结算记录," "检测是否存在虚开发票、金额不匹配等风险" ) async def _do_detect(self, context: DetectionContext) -> DetectionResult: """执行发票虚开检测""" # 获取参数 seller_tax_no = context.get_parameter("seller_tax_no") period = context.get_parameter("period") # 格式:YYYY-MM threshold_rate = context.get_parameter("threshold_rate", 0.1) # 差异率阈值 if not seller_tax_no or not period: return self._create_error_result(context, "缺少必要参数:seller_tax_no 或 period") db_session = context.db_session if not db_session: return self._create_error_result(context, "缺少数据库会话") try: # 获取发票数据 invoices = await self._get_invoices(db_session, seller_tax_no, period) # 获取订单和结算数据 orders, settlements = await self._get_business_data( db_session, seller_tax_no, period ) # 分析发票异常 anomalies = self._analyze_invoice_anomalies( invoices, orders, settlements, threshold_rate ) # 计算风险指标 total_invoice_amount = sum(inv["total_amount"] for inv in invoices) total_order_amount = sum(order["total_amount"] for order in orders) total_settlement_amount = sum(set["actual_amount"] for set in settlements) discrepancy = self._calculate_discrepancy( total_invoice_amount, total_order_amount, total_settlement_amount ) # 判断风险等级 risk_level, risk_score = self._calculate_risk_level(anomalies, discrepancy) # 生成风险描述和建议 description, suggestion = self._generate_risk_description( anomalies, discrepancy, total_invoice_amount ) # 创建检测结果 result = DetectionResult( task_id=context.task_id, rule_id=context.rule_id, entity_id=seller_tax_no, entity_type="taxpayer", risk_level=risk_level, risk_score=risk_score, description=description, suggestion=suggestion, risk_data={ "invoices": invoices, "orders": orders, "settlements": settlements, "anomalies": anomalies, "total_invoice_amount": total_invoice_amount, "total_order_amount": total_order_amount, "total_settlement_amount": total_settlement_amount, "discrepancy": discrepancy, "period": period, }, ) # 添加证据 result.add_evidence(RiskEvidence( evidence_type="invoice_summary", description=f"期间内共开具 {len(invoices)} 张发票,总金额 {total_invoice_amount:,.2f}元", data={ "count": len(invoices), "total_amount": total_invoice_amount, }, )) result.add_evidence(RiskEvidence( evidence_type="business_summary", description=f"实际业务:订单金额 {total_order_amount:,.2f}元,结算金额 {total_settlement_amount:,.2f}元", data={ "order_amount": total_order_amount, "settlement_amount": total_settlement_amount, }, )) if discrepancy > 0: result.add_evidence(RiskEvidence( evidence_type="discrepancy", description=f"发票与业务数据差异:{discrepancy:,.2f}元", data={"discrepancy": discrepancy}, )) # 添加主要异常记录 for anomaly in anomalies[:5]: result.add_evidence(RiskEvidence( evidence_type="anomaly_detail", description=f"异常:{anomaly['description']}", data=anomaly, )) return result except Exception as e: logger.error(f"发票虚开检测执行失败: {str(e)}", exc_info=True) return self._create_error_result(context, f"检测执行失败: {str(e)}") async def _get_invoices( self, db_session: AsyncSession, seller_tax_no: str, period: str ) -> List[Dict[str, Any]]: """获取发票数据""" try: start_date, end_date = self._parse_period(period) stmt = select(Invoice).where( and_( Invoice.seller_tax_no == seller_tax_no, Invoice.invoice_date >= start_date, Invoice.invoice_date <= end_date, Invoice.invoice_status == "正常", ) ).order_by(Invoice.invoice_date.desc()) result = await db_session.execute(stmt) invoices = result.scalars().all() return [ { "invoice_id": inv.invoice_id, "invoice_code": inv.invoice_code, "invoice_no": inv.invoice_no, "invoice_date": inv.invoice_date, "total_amount": inv.total_amount, "total_tax": inv.total_tax, "purchaser_name": inv.purchaser_name, "purchaser_tax_no": inv.purchaser_tax_no, "direction": inv.direction, "is_verified": inv.is_verified, "is_red_invoice": inv.is_red_invoice, } for inv in invoices ] except Exception as e: logger.error(f"获取发票数据失败: {str(e)}") return [] async def _get_business_data( self, db_session: AsyncSession, seller_tax_no: str, period: str ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: """获取订单和结算数据""" try: start_date, end_date = self._parse_period(period) # 获取订单 order_stmt = select(Order).where( and_( Order.order_time >= start_date, Order.order_time <= end_date, Order.order_status == "已完成", ) ) order_result = await db_session.execute(order_stmt) orders = order_result.scalars().all() # 获取结算 settlement_stmt = select(Settlement).where( and_( Settlement.settlement_start_date >= start_date, Settlement.settlement_end_date <= end_date, Settlement.settlement_status == "已结算", ) ) settlement_result = await db_session.execute(settlement_stmt) settlements = settlement_result.scalars().all() orders_data = [ { "order_id": order.order_id, "order_time": order.order_time, "total_amount": order.total_amount, "actual_payment": order.actual_payment, } for order in orders ] settlements_data = [ { "settlement_id": settlement.settlement_id, "settlement_period": settlement.settlement_period, "actual_settlement_amount": settlement.actual_settlement_amount, } for settlement in settlements ] return orders_data, settlements_data except Exception as e: logger.error(f"获取业务数据失败: {str(e)}") return [], [] def _analyze_invoice_anomalies( self, invoices: List[Dict[str, Any]], orders: List[Dict[str, Any]], settlements: List[Dict[str, Any]], threshold_rate: float, ) -> List[Dict[str, Any]]: """分析发票异常""" anomalies = [] # 检查发票与订单金额匹配 if invoices and orders: invoice_total = sum(inv["total_amount"] for inv in invoices) order_total = sum(order["total_amount"] for order in orders) if invoice_total > 0 and order_total > 0: diff_rate = abs(invoice_total - order_total) / max(invoice_total, order_total) if diff_rate > threshold_rate: anomalies.append({ "type": "amount_mismatch", "description": f"发票总额与订单总额差异率 {diff_rate*100:.2f}% 超过阈值", "invoice_total": invoice_total, "order_total": order_total, "diff_rate": diff_rate, }) # 检查发票与结算金额匹配 if invoices and settlements: invoice_total = sum(inv["total_amount"] for inv in invoices) settlement_total = sum(set["actual_settlement_amount"] for set in settlements) if invoice_total > 0 and settlement_total > 0: diff_rate = abs(invoice_total - settlement_total) / max(invoice_total, settlement_total) if diff_rate > threshold_rate: anomalies.append({ "type": "settlement_mismatch", "description": f"发票总额与结算总额差异率 {diff_rate*100:.2f}% 超过阈值", "invoice_total": invoice_total, "settlement_total": settlement_total, "diff_rate": diff_rate, }) # 检查大量红字发票 red_invoices = [inv for inv in invoices if inv["is_red_invoice"]] if red_invoices: red_rate = len(red_invoices) / len(invoices) if invoices else 0 if red_rate > 0.3: # 超过30%为红字发票 anomalies.append({ "type": "high_red_invoice_rate", "description": f"红字发票比例 {red_rate*100:.2f}% 过高", "red_count": len(red_invoices), "total_count": len(invoices), "red_rate": red_rate, }) # 检查大额发票 large_invoices = [inv for inv in invoices if inv["total_amount"] > 100000] if large_invoices: anomalies.append({ "type": "large_invoices", "description": f"存在 {len(large_invoices)} 张大额发票(>10万元)", "count": len(large_invoices), "total_amount": sum(inv["total_amount"] for inv in large_invoices), }) # 检查未验证的发票 unverified_invoices = [inv for inv in invoices if not inv["is_verified"]] if unverified_invoices: unverified_rate = len(unverified_invoices) / len(invoices) if invoices else 0 if unverified_rate > 0.2: # 超过20%未验证 anomalies.append({ "type": "high_unverified_rate", "description": f"未验证发票比例 {unverified_rate*100:.2f}% 过高", "unverified_count": len(unverified_invoices), "total_count": len(invoices), "unverified_rate": unverified_rate, }) return anomalies def _calculate_discrepancy( self, invoice_amount: float, order_amount: float, settlement_amount: float, ) -> float: """计算金额差异""" # 以发票金额为基准,计算与订单和结算的平均差异 if invoice_amount == 0: return 0.0 order_diff = abs(invoice_amount - order_amount) if order_amount > 0 else 0 settlement_diff = abs(invoice_amount - settlement_amount) if settlement_amount > 0 else 0 return (order_diff + settlement_diff) / 2 def _calculate_risk_level( self, anomalies: List[Dict[str, Any]], discrepancy: float ) -> tuple[RiskLevel, float]: """计算风险等级""" if not anomalies and discrepancy < 1000: return RiskLevel.LOW, 10.0 risk_score = 20.0 # 基础分 # 基于异常类型加分 anomaly_scores = { "amount_mismatch": 40, "settlement_mismatch": 35, "high_red_invoice_rate": 30, "large_invoices": 20, "high_unverified_rate": 25, } for anomaly in anomalies: risk_score += anomaly_scores.get(anomaly["type"], 10) # 基于差异金额加分 if discrepancy > 500000: risk_score += 30 elif discrepancy > 200000: risk_score += 20 elif discrepancy > 100000: risk_score += 10 # 限制分数范围 risk_score = min(risk_score, 100.0) # 判断风险等级 if risk_score >= 90: return RiskLevel.CRITICAL, risk_score elif risk_score >= 70: return RiskLevel.HIGH, risk_score elif risk_score >= 50: return RiskLevel.MEDIUM, risk_score else: return RiskLevel.LOW, risk_score def _generate_risk_description( self, anomalies: List[Dict[str, Any]], discrepancy: float, total_invoice_amount: float, ) -> tuple[str, str]: """生成风险描述和建议""" if anomalies: anomaly_count = len(anomalies) anomaly_types = [a["type"] for a in anomalies] description = ( f"检测到 {anomaly_count} 项发票异常风险:" f"发票总金额 {total_invoice_amount:,.2f}元," f"存在金额不匹配、红字发票比例过高等问题," f"可能涉及虚开发票风险。" ) suggestion = ( "1. 核实发票开具的真实性和合法性;\n" "2. 检查发票与实际业务的对应关系;\n" "3. 提供相关合同、订单、结算证明材料;\n" "4. 对红字发票说明冲红原因;\n" "5. 及时验证发票真伪;\n" "6. 如发现问题,建议主动补正申报。" ) else: description = "发票数据检查未发现明显异常,开票行为基本规范。" suggestion = "继续保持规范的发票管理。" return description, suggestion def _parse_period(self, period: str) -> tuple[datetime, datetime]: """解析期间为开始和结束日期""" try: year, month = map(int, period.split("-")) start_date = datetime(year, month, 1) if month == 12: end_date = datetime(year + 1, 1, 1) - timedelta(days=1) else: end_date = datetime(year, month + 1, 1) - timedelta(days=1) return start_date, end_date except Exception as e: logger.error(f"解析期间失败: {period}, 错误: {str(e)}") now = datetime.now() start_date = datetime(now.year, now.month, 1) end_date = now return start_date, end_date def _create_error_result(self, context: DetectionContext, error_message: str) -> DetectionResult: """创建错误结果""" return DetectionResult( task_id=context.task_id, rule_id=context.rule_id, entity_id=context.get_parameter("seller_tax_no", ""), entity_type="taxpayer", risk_level=RiskLevel.UNKNOWN, risk_score=0.0, description=f"发票虚开检测失败: {error_message}", suggestion="请检查参数设置或联系系统管理员", )