'use strict'; const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi; const CDATA_PATTERN = /^$/i; const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi; const TOOL_MARKUP_NAMES = ['tool_calls', 'invoke', 'parameter']; const { toStringSafe, } = require('./state'); function stripFencedCodeBlocks(text) { const t = typeof text === 'string' ? text : ''; if (!t) { return ''; } const lines = t.split('\n'); const out = []; let inFence = false; let fenceChar = ''; let fenceLen = 0; let inCDATA = false; let beforeFenceIdx = 0; for (let li = 0; li < lines.length; li += 1) { const line = lines[li]; const lineWithNL = li < lines.length - 1 ? line + '\n' : line; // CDATA protection if (inCDATA || cdataStartsBeforeFence(line)) { out.push(lineWithNL); inCDATA = updateCDATAStateLine(inCDATA, line); continue; } const trimmed = line.replace(/^[ \t]+/, ''); if (!inFence) { const fence = parseFenceOpenLine(trimmed); if (fence) { inFence = true; fenceChar = fence.ch; fenceLen = fence.count; beforeFenceIdx = out.length; continue; } out.push(lineWithNL); continue; } if (isFenceCloseLine(trimmed, fenceChar, fenceLen)) { inFence = false; fenceChar = ''; fenceLen = 0; } } if (inFence) { // Unclosed fence: keep content before the fence started. if (beforeFenceIdx > 0) { return out.slice(0, beforeFenceIdx).join(''); } return ''; } return out.join(''); } function parseFenceOpenLine(trimmed) { if (trimmed.length < 3) return null; const ch = trimmed[0]; if (ch !== '`' && ch !== '~') return null; let count = 0; while (count < trimmed.length && trimmed[count] === ch) count++; if (count < 3) return null; return { ch, count }; } function isFenceCloseLine(trimmed, fenceChar, fenceLen) { if (!fenceChar || !trimmed || trimmed[0] !== fenceChar) return false; let count = 0; while (count < trimmed.length && trimmed[count] === fenceChar) count++; if (count < fenceLen) return false; return trimmed.slice(count).trim() === ''; } function cdataStartsBeforeFence(line) { const cdataIdx = line.toLowerCase().indexOf('= 0 ? line.indexOf('```') : Infinity, line.indexOf('~~~') >= 0 ? line.indexOf('~~~') : Infinity, ); return fenceIdx === Infinity || cdataIdx < fenceIdx; } function updateCDATAStateLine(inCDATA, line) { const lower = line.toLowerCase(); let pos = 0; let state = inCDATA; while (pos < lower.length) { if (state) { const end = lower.indexOf(']]>', pos); if (end < 0) return true; pos = end + ']]>'.length; state = false; continue; } const start = lower.indexOf('', i + ''.length }; } if (lower.startsWith('', i + ''.length }; } return { advanced: false, blocked: false, next: i }; } function scanToolMarkupTagAt(text, start) { const raw = toStringSafe(text); if (!raw || start < 0 || start >= raw.length || raw[start] !== '<') { return null; } const lower = raw.toLowerCase(); let i = start + 1; const closing = raw[i] === '/'; if (closing) { i += 1; } let dsmlLike = false; if (i < raw.length && isToolMarkupPipe(raw[i])) { dsmlLike = true; i += 1; } if (lower.startsWith('dsml', i)) { dsmlLike = true; i += 'dsml'.length; while (i < raw.length && isToolMarkupSeparator(raw[i])) { i += 1; } } const { name, len } = matchToolMarkupName(lower, i); if (!name) { return null; } const nameEnd = i + len; if (!hasXmlTagBoundary(raw, nameEnd)) { return null; } const end = findXmlTagEnd(raw, nameEnd); if (end < 0) { return null; } return { start, end, nameStart: i, nameEnd, name, closing, selfClosing: raw.slice(start, end + 1).trim().endsWith('/>'), dsmlLike, canonical: !dsmlLike, }; } function findToolMarkupTagOutsideIgnored(text, from) { const raw = toStringSafe(text); const lower = raw.toLowerCase(); for (let i = Math.max(0, from || 0); i < raw.length;) { const skipped = skipXmlIgnoredSection(lower, i); if (skipped.blocked) { return null; } if (skipped.advanced) { i = skipped.next; continue; } const tag = scanToolMarkupTagAt(raw, i); if (tag) { return tag; } i += 1; } return null; } function findMatchingToolMarkupClose(text, openTag) { const raw = toStringSafe(text); if (!raw || !openTag || !openTag.name || openTag.closing) { return null; } let depth = 1; for (let pos = openTag.end + 1; pos < raw.length;) { const tag = findToolMarkupTagOutsideIgnored(raw, pos); if (!tag) { return null; } if (tag.name !== openTag.name) { pos = tag.end + 1; continue; } if (tag.closing) { depth -= 1; if (depth === 0) { return tag; } } else if (!tag.selfClosing) { depth += 1; } pos = tag.end + 1; } return null; } function findPartialToolMarkupStart(text) { const raw = toStringSafe(text); const lastLT = raw.lastIndexOf('<'); if (lastLT < 0) { return -1; } const tail = raw.slice(lastLT); if (tail.includes('>')) { return -1; } const lowerTail = tail.toLowerCase(); const candidates = [ '') { return i; } } return -1; } function hasXmlTagBoundary(text, idx) { if (idx >= text.length) { return true; } return [' ', '\t', '\n', '\r', '>', '/'].includes(text[idx]); } function isSelfClosingXmlTag(startTag) { return toStringSafe(startTag).trim().endsWith('/'); } function parseMarkupInput(raw) { const s = toStringSafe(raw).trim(); if (!s) { return {}; } // Prioritize XML-style KV tags (e.g., val) const kv = parseMarkupKVObject(s); if (Object.keys(kv).length > 0) { return kv; } // Fallback to JSON parsing const parsed = parseToolCallInput(s); if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { if (Object.keys(parsed).length > 0) { return parsed; } } return { _raw: extractRawTagValue(s) }; } function parseMarkupKVObject(text) { const raw = toStringSafe(text).trim(); if (!raw) { return {}; } const out = {}; for (const m of raw.matchAll(TOOL_CALL_MARKUP_KV_PATTERN)) { const key = toStringSafe(m[1]).trim(); if (!key) { continue; } const value = parseMarkupValue(m[2]); if (value === undefined || value === null) { continue; } appendMarkupValue(out, key, value); } return out; } function parseMarkupValue(raw) { const cdata = extractStandaloneCDATA(raw); if (cdata.ok) { const literal = parseJSONLiteralValue(cdata.value); return literal.ok ? literal.value : cdata.value; } const s = toStringSafe(extractRawTagValue(raw)).trim(); if (!s) { return ''; } if (s.includes('<') && s.includes('>')) { const nested = parseMarkupInput(s); if (nested && typeof nested === 'object' && !Array.isArray(nested)) { if (isOnlyRawValue(nested)) { return toStringSafe(nested._raw); } return nested; } } const literal = parseJSONLiteralValue(s); if (literal.ok) { return literal.value; } return s; } function extractRawTagValue(inner) { const s = toStringSafe(inner).trim(); if (!s) { return ''; } // 1. Check for CDATA const cdata = extractStandaloneCDATA(s); if (cdata.ok) { return cdata.value; } // 2. Fallback to unescaping standard HTML entities // Note: we avoid broad tag stripping here to preserve user content (like < symbols in code) return unescapeHtml(inner); } function unescapeHtml(safe) { if (!safe) return ''; return safe.replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/'/g, "'"); } function extractStandaloneCDATA(inner) { const s = toStringSafe(inner).trim(); const cdataMatch = s.match(CDATA_PATTERN); if (cdataMatch && cdataMatch[1] !== undefined) { return { ok: true, value: cdataMatch[1] }; } if (s.toLowerCase().startsWith(''; let out = ''; let pos = 0; let changed = false; while (pos < raw.length) { const startRel = lower.indexOf(openMarker, pos); if (startRel < 0) { out += raw.slice(pos); break; } const start = startRel; const contentStart = start + openMarker.length; out += raw.slice(pos, start); const endRel = lower.indexOf(closeMarker, contentStart); if (endRel >= 0) { const end = endRel + closeMarker.length; out += raw.slice(start, end); pos = end; continue; } changed = true; out += raw.slice(contentStart); pos = raw.length; } return changed ? out : raw; } function parseTagAttributes(raw) { const source = toStringSafe(raw); const out = {}; if (!source) { return out; } for (const match of source.matchAll(XML_ATTR_PATTERN)) { const key = toStringSafe(match[1]).trim().toLowerCase(); if (!key) { continue; } out[key] = match[3] || match[4] || ''; } return out; } function parseToolCallInput(v) { if (v == null) { return {}; } if (typeof v === 'string') { const raw = toStringSafe(v); if (!raw) { return {}; } try { const parsed = JSON.parse(raw); if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { return parsed; } return { _raw: raw }; } catch (_err) { return { _raw: raw }; } } if (typeof v === 'object' && !Array.isArray(v)) { return v; } try { const parsed = JSON.parse(JSON.stringify(v)); if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { return parsed; } } catch (_err) { return {}; } return {}; } function appendMarkupValue(out, key, value) { if (Object.prototype.hasOwnProperty.call(out, key)) { const current = out[key]; if (Array.isArray(current)) { current.push(value); return; } out[key] = [current, value]; return; } out[key] = value; } function isOnlyRawValue(obj) { if (!obj || typeof obj !== 'object' || Array.isArray(obj)) { return false; } const keys = Object.keys(obj); return keys.length === 1 && keys[0] === '_raw'; } module.exports = { stripFencedCodeBlocks, parseMarkupToolCalls, normalizeDSMLToolCallMarkup, containsToolMarkupSyntaxOutsideIgnored, containsToolCallWrapperSyntaxOutsideIgnored, findToolMarkupTagOutsideIgnored, findMatchingToolMarkupClose, findPartialToolMarkupStart, sanitizeLooseCDATA, };