|
29 | 29 | "linux": "2.0.0" |
30 | 30 | } |
31 | 31 |
|
| 32 | + |
| 33 | +def _sanitize_agent_json(body_str: str): |
| 34 | + """Repair the common malformed-JSON patterns a hand-built agent heartbeat can emit. |
| 35 | +
|
| 36 | + Agents assemble their heartbeat JSON as text in bash, so an empty interpolated value can leave |
| 37 | + a structurally-invalid comma (issue #31). Returns (possibly_repaired_str, was_changed). The |
| 38 | + repairs are conservative and target only structural artifacts an agent produces; they never |
| 39 | + alter this endpoint's legitimate string values (the agent emits no string containing ',,' — |
| 40 | + haproxy_stats_csv is base64/comma-free and the rest are constrained os/kernel/ip/version text). |
| 41 | + """ |
| 42 | + import re |
| 43 | + sanitized = False |
| 44 | + # Fix 1: empty value before a comma ("server_statuses": ,) |
| 45 | + if re.search(r':\s*,', body_str): |
| 46 | + body_str = re.sub(r':\s*,', ': null,', body_str); sanitized = True |
| 47 | + # Fix 2: empty value before a closing brace ("field":}) |
| 48 | + if re.search(r':\s*}', body_str): |
| 49 | + body_str = re.sub(r':\s*}', ': null}', body_str); sanitized = True |
| 50 | + # Fix 3: trailing comma before } or ] |
| 51 | + if re.search(r',(\s*[}\]])', body_str): |
| 52 | + body_str = re.sub(r',(\s*[}\]])', r'\1', body_str); sanitized = True |
| 53 | + # Fix 4: leading comma run right after an opening brace/bracket (issue #31): an empty |
| 54 | + # $system_info as the first member collapses to '{ , "name": ...'. The ': ,' fix above cannot |
| 55 | + # catch this because there is no key/colon before the comma. |
| 56 | + if re.search(r'([{\[])(\s*,)+', body_str): |
| 57 | + body_str = re.sub(r'([{\[])(\s*,)+', r'\1', body_str); sanitized = True |
| 58 | + # Fix 5: a run of commas between members (issue #31): an empty $system_info between two fields |
| 59 | + # produces '"version": "x",\n ,\n "haproxy_status": ...'. Runs after Fix 1/3 so only |
| 60 | + # structural commas remain; collapse any comma run to a single comma. |
| 61 | + if re.search(r',(\s*,)+', body_str): |
| 62 | + body_str = re.sub(r',(\s*,)+', ',', body_str); sanitized = True |
| 63 | + return body_str, sanitized |
| 64 | + |
| 65 | + |
32 | 66 | def get_platform_key(agent_platform: str) -> str: |
33 | 67 | """Convert agent platform to standardized platform key - fixed empty platform fallback""" |
34 | 68 | platform = agent_platform.lower() if agent_platform else 'unknown' |
@@ -1455,47 +1489,33 @@ async def agent_heartbeat_by_name( |
1455 | 1489 | import json |
1456 | 1490 | from pydantic import ValidationError |
1457 | 1491 |
|
1458 | | - # Read raw body and sanitize common JSON errors from agents |
| 1492 | + # Read raw body. Parse VALID JSON as-is (the normal case for every agent version) and only |
| 1493 | + # fall back to the malformed-JSON repair when the body does not parse. This guarantees a healthy |
| 1494 | + # heartbeat from any agent version is byte-for-byte untouched — the repair regexes can never run |
| 1495 | + # against a well-formed payload (issue #31; strictly safer than repairing unconditionally). |
1459 | 1496 | try: |
1460 | 1497 | raw_body = await request.body() |
1461 | 1498 | body_str = raw_body.decode('utf-8') |
1462 | | - |
1463 | | - # Sanitize common malformed JSON patterns from agents |
1464 | | - original_body = body_str |
1465 | | - sanitized = False |
1466 | | - |
1467 | | - # Fix 1: Empty values before comma (most common: "server_statuses": ,) |
1468 | | - if re.search(r':\s*,', body_str): |
1469 | | - body_str = re.sub(r':\s*,', ': null,', body_str) |
1470 | | - sanitized = True |
1471 | | - |
1472 | | - # Fix 2: Empty values before closing brace |
1473 | | - if re.search(r':\s*}', body_str): |
1474 | | - body_str = re.sub(r':\s*}', ': null}', body_str) |
1475 | | - sanitized = True |
1476 | | - |
1477 | | - # Fix 3: Trailing commas |
1478 | | - if re.search(r',(\s*[}\]])', body_str): |
1479 | | - body_str = re.sub(r',(\s*[}\]])', r'\1', body_str) |
1480 | | - sanitized = True |
1481 | | - |
1482 | | - if sanitized: |
1483 | | - # Extract agent name for logging |
1484 | | - agent_name = "unknown" |
1485 | | - try: |
1486 | | - name_match = re.search(r'"name"\s*:\s*"([^"]+)"', body_str) |
1487 | | - if name_match: |
1488 | | - agent_name = name_match.group(1) |
1489 | | - except: |
1490 | | - pass |
1491 | | - |
1492 | | - logger.info(f"Sanitized malformed JSON from agent '{agent_name}' - fixed empty values and trailing commas") |
1493 | | - logger.debug(f"Original JSON (preview): {original_body[:300]}") |
1494 | | - logger.debug(f"Sanitized JSON (preview): {body_str[:300]}") |
1495 | | - |
1496 | | - # Parse sanitized JSON into Pydantic model |
1497 | | - heartbeat_dict = json.loads(body_str) |
1498 | | - |
| 1499 | + |
| 1500 | + try: |
| 1501 | + heartbeat_dict = json.loads(body_str) |
| 1502 | + except json.JSONDecodeError: |
| 1503 | + # Malformed body (would otherwise be a hard 400). Attempt a conservative repair of the |
| 1504 | + # comma artifacts a hand-built agent heartbeat can emit, then re-parse. |
| 1505 | + repaired, changed = _sanitize_agent_json(body_str) |
| 1506 | + if changed: |
| 1507 | + agent_name = "unknown" |
| 1508 | + try: |
| 1509 | + name_match = re.search(r'"name"\s*:\s*"([^"]+)"', repaired) |
| 1510 | + if name_match: |
| 1511 | + agent_name = name_match.group(1) |
| 1512 | + except Exception: |
| 1513 | + pass |
| 1514 | + logger.info(f"Repaired malformed JSON from agent '{agent_name}' before parsing") |
| 1515 | + logger.debug(f"Original JSON (preview): {body_str[:300]}") |
| 1516 | + logger.debug(f"Repaired JSON (preview): {repaired[:300]}") |
| 1517 | + heartbeat_dict = json.loads(repaired) # may still raise -> handled as 400 below |
| 1518 | + |
1499 | 1519 | # DEBUG: Log cluster_id for auto-register troubleshooting |
1500 | 1520 | if heartbeat_dict.get('name'): |
1501 | 1521 | logger.info(f"HEARTBEAT DEBUG: agent={heartbeat_dict.get('name')}, cluster_id={heartbeat_dict.get('cluster_id')}, has_cluster_id={bool(heartbeat_dict.get('cluster_id'))}") |
|
0 commit comments