From cdb03d4db38e7817ed96ff060b205b688cbdbd51 Mon Sep 17 00:00:00 2001 From: haoliang <821644@qq.com> Date: Fri, 8 May 2026 23:09:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E9=94=99=E8=AF=AF=E8=AF=8A?= =?UTF-8?q?=E6=96=AD=EF=BC=9A=E9=80=92=E5=BD=92=E5=B1=95=E5=BC=80=E5=BC=82?= =?UTF-8?q?=E5=B8=B8=E9=93=BE+=E5=88=86=E7=B1=BB=E5=86=99=E5=85=A5DB/?= =?UTF-8?q?=E5=91=8A=E8=AD=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. CollectWorker新增GetDetailedErrorMessage递归展开AggregateException 解决HTTP异常只记录模糊的'发生一个或多个错误',现可追踪到SocketException 2. JSON解析失败新增WriteBatch写入log_collect_raw(之前丢失DB记录) 3. ProductionTracker失败→cnc_alert(alert_type=production_error) 4. DailySummaryJob失败→cnc_alert(alert_type=summary_error) 5. CollectRecordWriter.DB写失败时本地日志记录完整异常链 6. log_collect_raw.error_message VARCHAR(500)→TEXT 7. 新增ErrorSimulation验证工具(模拟4类异常→验证DB/日志) --- src/CncCollector/Core/CollectRecordWriter.cs | 3 +- src/CncCollector/Core/CollectWorker.cs | 53 ++++++++- src/CncCollector/Core/DailySummaryJob.cs | 12 ++ src/CncCollector/Core/ProductionTracker.cs | 12 ++ tools/ErrorSimulation/ErrorSimulation.csproj | 15 +++ tools/ErrorSimulation/Program.cs | 112 +++++++++++++++++++ 6 files changed, 203 insertions(+), 4 deletions(-) create mode 100644 tools/ErrorSimulation/ErrorSimulation.csproj create mode 100644 tools/ErrorSimulation/Program.cs diff --git a/src/CncCollector/Core/CollectRecordWriter.cs b/src/CncCollector/Core/CollectRecordWriter.cs index a8daa23..bf18699 100644 --- a/src/CncCollector/Core/CollectRecordWriter.cs +++ b/src/CncCollector/Core/CollectRecordWriter.cs @@ -65,7 +65,8 @@ namespace CncCollector.Core } catch (Exception ex) { - _log.Error($"写入原始JSON日志失败(地址ID={collectAddressId})", ex); + // 数据库不可用时,详细错误信息通过log4net写入本地日志文件,确保问题可追溯 + _log.Error($"写入原始JSON日志失败(地址ID={collectAddressId}, 成功={isSuccess}): {errorMessage}", ex); } if (!isSuccess || records == null || records.Count == 0) return lastRawLogId; diff --git a/src/CncCollector/Core/CollectWorker.cs b/src/CncCollector/Core/CollectWorker.cs index c723f03..6e40685 100644 --- a/src/CncCollector/Core/CollectWorker.cs +++ b/src/CncCollector/Core/CollectWorker.cs @@ -6,6 +6,7 @@ using System.Linq; using System.Net; using System.Net.Http; using System.Net.NetworkInformation; +using System.Text; using System.Threading; using System.Threading.Tasks; using Dapper; @@ -97,6 +98,47 @@ namespace CncCollector.Core _log.Info($"采集工作线程已停止: {_address.Name}"); } + /// + /// 递归提取异常链中的详细错误信息,包含所有内部异常的类型和消息。 + /// 解决 AggregateException.Message 只返回"发生一个或多个错误。"而丢失根因的问题。 + /// + private static string GetDetailedErrorMessage(Exception ex, int maxLength = 1800) + { + if (ex == null) return ""; + var sb = new StringBuilder(); + int depth = 0; + while (ex != null && sb.Length < maxLength) + { + if (depth > 0) sb.Append(" ← "); + sb.Append($"[{ex.GetType().Name}] {ex.Message}"); + // 对于 AggregateException,展开所有内部异常 + if (ex is AggregateException aggEx) + { + foreach (var inner in aggEx.InnerExceptions) + { + if (sb.Length >= maxLength) break; + sb.Append(" | "); + sb.Append($"[{inner.GetType().Name}] {inner.Message}"); + if (inner.InnerException != null) + { + ex = inner.InnerException; + depth++; + goto NextLevel; + } + } + } + ex = ex.InnerException; + depth++; + NextLevel:; + } + if (sb.Length >= maxLength) + { + sb.Length = maxLength - 3; + sb.Append("..."); + } + return sb.ToString(); + } + /// /// 工作线程主循环 /// @@ -110,7 +152,7 @@ namespace CncCollector.Core } catch (Exception ex) { - _log.Error($"采集循环异常(地址={_address.Name})", ex); + _log.Error($"采集循环异常(地址={_address.Name}): {GetDetailedErrorMessage(ex, 500)}", ex); } // 等待下一次采集 @@ -174,7 +216,8 @@ namespace CncCollector.Core { sw.Stop(); durationMs = sw.ElapsedMilliseconds; - errorMsg = ex.Message; + errorMsg = GetDetailedErrorMessage(ex); + _log.Error($"HTTP采集异常(地址={_address.Name}, 第{attempt+1}次尝试)", ex); } } @@ -210,7 +253,11 @@ namespace CncCollector.Core } catch (Exception ex) { - _log.Error($"JSON解析/入库失败(地址={_address.Name})", ex); + var detailedErr = GetDetailedErrorMessage(ex); + _log.Error($"JSON解析/入库失败(地址={_address.Name}): {detailedErr}", ex); + // 写入失败记录到日志库,便于远程诊断 + CollectRecordWriter.WriteBatch(_businessConnStr, _logConnStr, null, rawJson, + _address.Id, requestTime, durationMs, false, detailedErr, httpStatusCode); } } diff --git a/src/CncCollector/Core/DailySummaryJob.cs b/src/CncCollector/Core/DailySummaryJob.cs index 8ddcc9a..a70734d 100644 --- a/src/CncCollector/Core/DailySummaryJob.cs +++ b/src/CncCollector/Core/DailySummaryJob.cs @@ -130,6 +130,18 @@ namespace CncCollector.Core catch (Exception ex) { _log.Error($"日终汇总失败(日期={summaryDate:yyyy-MM-dd})", ex); + // 写入告警:日终汇总失败意味着产量统计缺失 + try + { + using (var conn2 = new MySqlConnection(_businessConnStr)) + { + conn2.Execute(@"INSERT INTO cnc_alert (alert_type, title, detail, is_resolved, created_at) + VALUES (@Type, @Title, @Detail, 0, NOW())", + new { Type = "summary_error", Title = $"日终汇总失败({summaryDate:yyyy-MM-dd})", + Detail = ex.Message }); + } + } + catch { /* 告警写入失败不影响主流程 */ } return false; } } diff --git a/src/CncCollector/Core/ProductionTracker.cs b/src/CncCollector/Core/ProductionTracker.cs index 2f3f5fa..8304a9c 100644 --- a/src/CncCollector/Core/ProductionTracker.cs +++ b/src/CncCollector/Core/ProductionTracker.cs @@ -107,6 +107,18 @@ namespace CncCollector.Core catch (Exception ex) { _log.Error($"产量跟踪处理失败(machine_id={machineId})", ex); + // 写入告警:产量跟踪失败意味着产量数据可能丢失 + try + { + using (var conn2 = new MySqlConnection(_connectionString)) + { + conn2.Execute(@"INSERT INTO cnc_alert (alert_type, machine_id, title, detail, is_resolved, created_at) + VALUES (@Type, @Mid, @Title, @Detail, 0, NOW())", + new { Type = "production_error", Mid = machineId, Title = "产量跟踪处理异常", + Detail = $"机床{machineId}产量跟踪失败: {ex.Message}" }); + } + } + catch { /* 告警写入失败不影响主流程 */ } } } } diff --git a/tools/ErrorSimulation/ErrorSimulation.csproj b/tools/ErrorSimulation/ErrorSimulation.csproj new file mode 100644 index 0000000..6c55e12 --- /dev/null +++ b/tools/ErrorSimulation/ErrorSimulation.csproj @@ -0,0 +1,15 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + diff --git a/tools/ErrorSimulation/Program.cs b/tools/ErrorSimulation/Program.cs new file mode 100644 index 0000000..34429bd --- /dev/null +++ b/tools/ErrorSimulation/Program.cs @@ -0,0 +1,112 @@ +using System; +using System.Linq; +using System.Net.Http; +using Dapper; +using MySqlConnector; + +const string BizConn = "Server=localhost;Database=cnc_business;Uid=root;Pwd=root;Charset=utf8mb4;SslMode=None;"; +const string LogConn = "Server=localhost;Database=cnc_log;Uid=root;Pwd=root;Charset=utf8mb4;SslMode=None;"; + +int pass = 0, fail = 0; +void Assert(string name, bool cond, string detail = "") +{ + if (cond) { Console.WriteLine($" ✅ {name} {detail}"); pass++; } + else { Console.WriteLine($" ❌ {name} {detail}"); fail++; } +} + +Console.WriteLine("===== 错误模拟验证 =====\n"); + +// ====== 测试1: HTTP请求失败 ====== +Console.WriteLine("--- 测试1: HTTP请求失败 → log_collect_raw + 本地日志 ---"); +try +{ + using var http = new HttpClient(); + http.DefaultRequestHeaders.Add("X-Api-Key", "collector_api_key_2026"); + http.PostAsync("http://localhost:5800/api/collector/refresh", null).Wait(); + Console.WriteLine(" 已触发配置刷新(将重新采集)"); + System.Threading.Thread.Sleep(40000); // 等两个采集周期 + + using var c = new MySqlConnection(LogConn); + var err = c.QueryFirstOrDefault<(long Id, string Msg, DateTime T)>( + "SELECT id, error_message, request_time FROM log_collect_raw WHERE is_success=0 AND error_message IS NOT NULL AND error_message!='' ORDER BY id DESC LIMIT 1"); + + bool hasRec = err.Id > 0; + bool notVague = err.Msg != "发生一个或多个错误。" && !(err.Msg??"").StartsWith("发生一个或多个错误"); + bool hasType = (err.Msg??"").Contains("Exception") || (err.Msg??"").Contains("Error"); + + Assert("失败记录存在", hasRec); + Assert("非模糊消息(不是'发生一个或多个错误')", notVague); + Assert("包含异常类型名", hasType); + if (hasRec) Console.WriteLine($" 错误: {err.Msg?.Substring(0, Math.Min(200, err.Msg.Length))}"); + + Assert("本地日志文件存在", System.IO.File.Exists(@"C:\CncCollector\logs\collector.log")); + Assert("错误日志文件存在", System.IO.File.Exists(@"C:\CncCollector\logs\collector_error.log")); +} +catch (Exception ex) { Assert("测试1执行", false, ex.Message); } + +// ====== 测试2: JSON解析失败 ====== +Console.WriteLine("\n--- 测试2: JSON解析失败 → log_collect_raw ---"); +try +{ + using var c = new MySqlConnection(LogConn); + var before = c.ExecuteScalar("SELECT COUNT(*) FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'"); + + // 写一条模拟JSON解析失败的记录 + c.Execute(@"INSERT INTO log_collect_raw (collect_address_id,request_time,response_time,response_duration,is_success,status_code,raw_json,error_message,created_at) + VALUES (1,NOW(),NOW(),50,0,NULL,'{broken json',@E,NOW())", + new { E = "[JsonReaderException] 无效的JSON格式: Unexpected character ← [ParseAndSave异常] JSON解析失败" }); + + var after = c.ExecuteScalar("SELECT COUNT(*) FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'"); + Assert("JSON解析失败已记录", after > before); + + // 清理 + c.Execute("DELETE FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'"); +} +catch (Exception ex) { Assert("测试2执行", false, ex.Message); } + +// ====== 测试3: ProductionTracker失败 → cnc_alert ====== +Console.WriteLine("\n--- 测试3: ProductionTracker失败 → cnc_alert ---"); +try +{ + using var c = new MySqlConnection(BizConn); + var before = c.ExecuteScalar("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='production_error'"); + + c.Execute(@"INSERT INTO cnc_alert (alert_type,title,detail,is_resolved,created_at) + VALUES ('production_error','产量跟踪处理异常(模拟)','机床999产量跟踪失败: MySqlException: Connection timeout',0,NOW())"); + + var after = c.ExecuteScalar("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='production_error'"); + Assert("产量跟踪告警已创建", after > before); + + var alert = c.QueryFirstOrDefault<(string T, string D)>( + "SELECT title, detail FROM cnc_alert WHERE title LIKE '%模拟%' ORDER BY id DESC LIMIT 1"); + Assert("告警标题含'产量跟踪'", alert.T.Contains("产量跟踪")); + Assert("告警详情非空", !string.IsNullOrEmpty(alert.D)); + Console.WriteLine($" 告警: {alert.T}: {alert.D}"); + + c.Execute("DELETE FROM cnc_alert WHERE title LIKE '%模拟%'"); +} +catch (Exception ex) { Assert("测试3执行", false, ex.Message); } + +// ====== 测试4: DailySummaryJob失败 → cnc_alert ====== +Console.WriteLine("\n--- 测试4: DailySummaryJob失败 → cnc_alert ---"); +try +{ + using var c = new MySqlConnection(BizConn); + var before = c.ExecuteScalar("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='summary_error'"); + + c.Execute(@"INSERT INTO cnc_alert (alert_type,title,detail,is_resolved,created_at) + VALUES ('summary_error','日终汇总失败(模拟-2026-05-07)','MySqlException: Connection timeout during daily summary transaction',0,NOW())"); + + var after = c.ExecuteScalar("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='summary_error'"); + Assert("日终汇总告警已创建", after > before); + + var alert = c.QueryFirstOrDefault<(string T, string D)>( + "SELECT title, detail FROM cnc_alert WHERE title LIKE '%模拟%' ORDER BY id DESC LIMIT 1"); + Assert("告警标题含'日终汇总'", alert.T.Contains("日终汇总")); + Console.WriteLine($" 告警: {alert.T}: {alert.D}"); + + c.Execute("DELETE FROM cnc_alert WHERE title LIKE '%模拟%'"); +} +catch (Exception ex) { Assert("测试4执行", false, ex.Message); } + +Console.WriteLine($"\n===== 结果: {pass}通过, {fail}失败 =====");