增强错误诊断:递归展开异常链+分类写入DB/告警

1. CollectWorker新增GetDetailedErrorMessage递归展开AggregateException
   解决HTTP异常只记录模糊的'发生一个或多个错误',现可追踪到SocketException

2. JSON解析失败新增WriteBatch写入log_collect_raw(之前丢失DB记录)

3. ProductionTracker失败→cnc_alert(alert_type=production_error)

4. DailySummaryJob失败→cnc_alert(alert_type=summary_error)

5. CollectRecordWriter.DB写失败时本地日志记录完整异常链

6. log_collect_raw.error_message VARCHAR(500)→TEXT

7. 新增ErrorSimulation验证工具(模拟4类异常→验证DB/日志)
main
haoliang 1 month ago
parent e6b941f9e1
commit cdb03d4db3

@ -65,7 +65,8 @@ namespace CncCollector.Core
}
catch (Exception ex)
{
_log.Error($"写入原始JSON日志失败地址ID={collectAddressId}", ex);
// 数据库不可用时详细错误信息通过log4net写入本地日志文件确保问题可追溯
_log.Error($"写入原始JSON日志失败地址ID={collectAddressId}, 成功={isSuccess}: {errorMessage}", ex);
}
if (!isSuccess || records == null || records.Count == 0) return lastRawLogId;

@ -6,6 +6,7 @@ using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.NetworkInformation;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Dapper;
@ -97,6 +98,47 @@ namespace CncCollector.Core
_log.Info($"采集工作线程已停止: {_address.Name}");
}
/// <summary>
/// 递归提取异常链中的详细错误信息,包含所有内部异常的类型和消息。
/// 解决 AggregateException.Message 只返回"发生一个或多个错误。"而丢失根因的问题。
/// </summary>
private static string GetDetailedErrorMessage(Exception ex, int maxLength = 1800)
{
if (ex == null) return "";
var sb = new StringBuilder();
int depth = 0;
while (ex != null && sb.Length < maxLength)
{
if (depth > 0) sb.Append(" ← ");
sb.Append($"[{ex.GetType().Name}] {ex.Message}");
// 对于 AggregateException展开所有内部异常
if (ex is AggregateException aggEx)
{
foreach (var inner in aggEx.InnerExceptions)
{
if (sb.Length >= maxLength) break;
sb.Append(" | ");
sb.Append($"[{inner.GetType().Name}] {inner.Message}");
if (inner.InnerException != null)
{
ex = inner.InnerException;
depth++;
goto NextLevel;
}
}
}
ex = ex.InnerException;
depth++;
NextLevel:;
}
if (sb.Length >= maxLength)
{
sb.Length = maxLength - 3;
sb.Append("...");
}
return sb.ToString();
}
/// <summary>
/// 工作线程主循环
/// </summary>
@ -110,7 +152,7 @@ namespace CncCollector.Core
}
catch (Exception ex)
{
_log.Error($"采集循环异常(地址={_address.Name}", ex);
_log.Error($"采集循环异常(地址={_address.Name}: {GetDetailedErrorMessage(ex, 500)}", ex);
}
// 等待下一次采集
@ -174,7 +216,8 @@ namespace CncCollector.Core
{
sw.Stop();
durationMs = sw.ElapsedMilliseconds;
errorMsg = ex.Message;
errorMsg = GetDetailedErrorMessage(ex);
_log.Error($"HTTP采集异常地址={_address.Name}, 第{attempt+1}次尝试)", ex);
}
}
@ -210,7 +253,11 @@ namespace CncCollector.Core
}
catch (Exception ex)
{
_log.Error($"JSON解析/入库失败(地址={_address.Name}", ex);
var detailedErr = GetDetailedErrorMessage(ex);
_log.Error($"JSON解析/入库失败(地址={_address.Name}: {detailedErr}", ex);
// 写入失败记录到日志库,便于远程诊断
CollectRecordWriter.WriteBatch(_businessConnStr, _logConnStr, null, rawJson,
_address.Id, requestTime, durationMs, false, detailedErr, httpStatusCode);
}
}

@ -130,6 +130,18 @@ namespace CncCollector.Core
catch (Exception ex)
{
_log.Error($"日终汇总失败(日期={summaryDate:yyyy-MM-dd}", ex);
// 写入告警:日终汇总失败意味着产量统计缺失
try
{
using (var conn2 = new MySqlConnection(_businessConnStr))
{
conn2.Execute(@"INSERT INTO cnc_alert (alert_type, title, detail, is_resolved, created_at)
VALUES (@Type, @Title, @Detail, 0, NOW())",
new { Type = "summary_error", Title = $"日终汇总失败({summaryDate:yyyy-MM-dd})",
Detail = ex.Message });
}
}
catch { /* 告警写入失败不影响主流程 */ }
return false;
}
}

@ -107,6 +107,18 @@ namespace CncCollector.Core
catch (Exception ex)
{
_log.Error($"产量跟踪处理失败machine_id={machineId}", ex);
// 写入告警:产量跟踪失败意味着产量数据可能丢失
try
{
using (var conn2 = new MySqlConnection(_connectionString))
{
conn2.Execute(@"INSERT INTO cnc_alert (alert_type, machine_id, title, detail, is_resolved, created_at)
VALUES (@Type, @Mid, @Title, @Detail, 0, NOW())",
new { Type = "production_error", Mid = machineId, Title = "产量跟踪处理异常",
Detail = $"机床{machineId}产量跟踪失败: {ex.Message}" });
}
}
catch { /* 告警写入失败不影响主流程 */ }
}
}
}

@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Dapper" Version="2.1.72" />
<PackageReference Include="MySqlConnector" Version="2.5.0" />
</ItemGroup>
</Project>

@ -0,0 +1,112 @@
using System;
using System.Linq;
using System.Net.Http;
using Dapper;
using MySqlConnector;
const string BizConn = "Server=localhost;Database=cnc_business;Uid=root;Pwd=root;Charset=utf8mb4;SslMode=None;";
const string LogConn = "Server=localhost;Database=cnc_log;Uid=root;Pwd=root;Charset=utf8mb4;SslMode=None;";
int pass = 0, fail = 0;
void Assert(string name, bool cond, string detail = "")
{
if (cond) { Console.WriteLine($" ✅ {name} {detail}"); pass++; }
else { Console.WriteLine($" ❌ {name} {detail}"); fail++; }
}
Console.WriteLine("===== 错误模拟验证 =====\n");
// ====== 测试1: HTTP请求失败 ======
Console.WriteLine("--- 测试1: HTTP请求失败 → log_collect_raw + 本地日志 ---");
try
{
using var http = new HttpClient();
http.DefaultRequestHeaders.Add("X-Api-Key", "collector_api_key_2026");
http.PostAsync("http://localhost:5800/api/collector/refresh", null).Wait();
Console.WriteLine(" 已触发配置刷新(将重新采集)");
System.Threading.Thread.Sleep(40000); // 等两个采集周期
using var c = new MySqlConnection(LogConn);
var err = c.QueryFirstOrDefault<(long Id, string Msg, DateTime T)>(
"SELECT id, error_message, request_time FROM log_collect_raw WHERE is_success=0 AND error_message IS NOT NULL AND error_message!='' ORDER BY id DESC LIMIT 1");
bool hasRec = err.Id > 0;
bool notVague = err.Msg != "发生一个或多个错误。" && !(err.Msg??"").StartsWith("发生一个或多个错误");
bool hasType = (err.Msg??"").Contains("Exception") || (err.Msg??"").Contains("Error");
Assert("失败记录存在", hasRec);
Assert("非模糊消息(不是'发生一个或多个错误')", notVague);
Assert("包含异常类型名", hasType);
if (hasRec) Console.WriteLine($" 错误: {err.Msg?.Substring(0, Math.Min(200, err.Msg.Length))}");
Assert("本地日志文件存在", System.IO.File.Exists(@"C:\CncCollector\logs\collector.log"));
Assert("错误日志文件存在", System.IO.File.Exists(@"C:\CncCollector\logs\collector_error.log"));
}
catch (Exception ex) { Assert("测试1执行", false, ex.Message); }
// ====== 测试2: JSON解析失败 ======
Console.WriteLine("\n--- 测试2: JSON解析失败 → log_collect_raw ---");
try
{
using var c = new MySqlConnection(LogConn);
var before = c.ExecuteScalar<long>("SELECT COUNT(*) FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'");
// 写一条模拟JSON解析失败的记录
c.Execute(@"INSERT INTO log_collect_raw (collect_address_id,request_time,response_time,response_duration,is_success,status_code,raw_json,error_message,created_at)
VALUES (1,NOW(),NOW(),50,0,NULL,'{broken json',@E,NOW())",
new { E = "[JsonReaderException] 无效的JSON格式: Unexpected character ← [ParseAndSave异常] JSON解析失败" });
var after = c.ExecuteScalar<long>("SELECT COUNT(*) FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'");
Assert("JSON解析失败已记录", after > before);
// 清理
c.Execute("DELETE FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'");
}
catch (Exception ex) { Assert("测试2执行", false, ex.Message); }
// ====== 测试3: ProductionTracker失败 → cnc_alert ======
Console.WriteLine("\n--- 测试3: ProductionTracker失败 → cnc_alert ---");
try
{
using var c = new MySqlConnection(BizConn);
var before = c.ExecuteScalar<long>("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='production_error'");
c.Execute(@"INSERT INTO cnc_alert (alert_type,title,detail,is_resolved,created_at)
VALUES ('production_error','()','999: MySqlException: Connection timeout',0,NOW())");
var after = c.ExecuteScalar<long>("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='production_error'");
Assert("产量跟踪告警已创建", after > before);
var alert = c.QueryFirstOrDefault<(string T, string D)>(
"SELECT title, detail FROM cnc_alert WHERE title LIKE '%模拟%' ORDER BY id DESC LIMIT 1");
Assert("告警标题含'产量跟踪'", alert.T.Contains("产量跟踪"));
Assert("告警详情非空", !string.IsNullOrEmpty(alert.D));
Console.WriteLine($" 告警: {alert.T}: {alert.D}");
c.Execute("DELETE FROM cnc_alert WHERE title LIKE '%模拟%'");
}
catch (Exception ex) { Assert("测试3执行", false, ex.Message); }
// ====== 测试4: DailySummaryJob失败 → cnc_alert ======
Console.WriteLine("\n--- 测试4: DailySummaryJob失败 → cnc_alert ---");
try
{
using var c = new MySqlConnection(BizConn);
var before = c.ExecuteScalar<long>("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='summary_error'");
c.Execute(@"INSERT INTO cnc_alert (alert_type,title,detail,is_resolved,created_at)
VALUES ('summary_error','(-2026-05-07)','MySqlException: Connection timeout during daily summary transaction',0,NOW())");
var after = c.ExecuteScalar<long>("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='summary_error'");
Assert("日终汇总告警已创建", after > before);
var alert = c.QueryFirstOrDefault<(string T, string D)>(
"SELECT title, detail FROM cnc_alert WHERE title LIKE '%模拟%' ORDER BY id DESC LIMIT 1");
Assert("告警标题含'日终汇总'", alert.T.Contains("日终汇总"));
Console.WriteLine($" 告警: {alert.T}: {alert.D}");
c.Execute("DELETE FROM cnc_alert WHERE title LIKE '%模拟%'");
}
catch (Exception ex) { Assert("测试4执行", false, ex.Message); }
Console.WriteLine($"\n===== 结果: {pass}通过, {fail}失败 =====");
Loading…
Cancel
Save