mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-22 00:47:45 +08:00
feat: add support for CJK angle bracket and trailing attribute separator drift in DSML tool parsing
This commit is contained in:
@@ -65,6 +65,7 @@ func normalizeToolMarkupTagTailForXML(tail string) string {
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(tail))
|
||||
quote := rune(0)
|
||||
for i := 0; i < len(tail); {
|
||||
r, size := utf8.DecodeRuneInString(tail[i:])
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
@@ -72,9 +73,38 @@ func normalizeToolMarkupTagTailForXML(tail string) string {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
switch normalizeFullwidthASCII(r) {
|
||||
case '>', '/', '=', '"', '\'':
|
||||
b.WriteRune(normalizeFullwidthASCII(r))
|
||||
ch := normalizeFullwidthASCII(r)
|
||||
if quote != 0 {
|
||||
b.WriteRune(ch)
|
||||
if ch == quote {
|
||||
quote = 0
|
||||
}
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
switch ch {
|
||||
case '"', '\'':
|
||||
quote = ch
|
||||
b.WriteRune(ch)
|
||||
case '|':
|
||||
j := i + size
|
||||
for j < len(tail) {
|
||||
next, nextSize := utf8.DecodeRuneInString(tail[j:])
|
||||
if nextSize <= 0 {
|
||||
break
|
||||
}
|
||||
if next == ' ' || next == '\t' || next == '\r' || next == '\n' {
|
||||
j += nextSize
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
next, _ := normalizedASCIIAt(tail, j)
|
||||
if next != '>' {
|
||||
b.WriteRune(ch)
|
||||
}
|
||||
case '>', '/', '=':
|
||||
b.WriteRune(ch)
|
||||
default:
|
||||
b.WriteString(tail[i : i+size])
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)>`)
|
||||
|
||||
// cdataPattern matches a standalone CDATA section.
|
||||
var cdataPattern = regexp.MustCompile(`(?is)^<!\[CDATA\[(.*?)]](?:>|>)$`)
|
||||
var cdataPattern = regexp.MustCompile(`(?is)^(?:<|〈)!\[CDATA\[(.*?)]](?:>|>|〉)$`)
|
||||
|
||||
func parseMarkupKVObject(text string) map[string]any {
|
||||
matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1)
|
||||
|
||||
@@ -305,19 +305,23 @@ func indexToolCDATAClose(text string, from int) int {
|
||||
}
|
||||
asciiIdx := strings.Index(text[from:], "]]>")
|
||||
fullIdx := strings.Index(text[from:], "]]>")
|
||||
if asciiIdx < 0 && fullIdx < 0 {
|
||||
cjkIdx := strings.Index(text[from:], "]]〉")
|
||||
if asciiIdx < 0 && fullIdx < 0 && cjkIdx < 0 {
|
||||
return -1
|
||||
}
|
||||
if asciiIdx < 0 {
|
||||
return from + fullIdx
|
||||
best := -1
|
||||
for _, idx := range []int{asciiIdx, fullIdx, cjkIdx} {
|
||||
if idx >= 0 && (best < 0 || idx < best) {
|
||||
best = idx
|
||||
}
|
||||
}
|
||||
if fullIdx < 0 || asciiIdx < fullIdx {
|
||||
return from + asciiIdx
|
||||
}
|
||||
return from + fullIdx
|
||||
return from + best
|
||||
}
|
||||
|
||||
func toolCDATACloseLenAt(text string, idx int) int {
|
||||
if strings.HasPrefix(text[idx:], "]]〉") {
|
||||
return len("]]〉")
|
||||
}
|
||||
if strings.HasPrefix(text[idx:], "]]>") {
|
||||
return len("]]>")
|
||||
}
|
||||
|
||||
@@ -134,12 +134,17 @@ func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag
|
||||
}
|
||||
|
||||
func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
|
||||
if start < 0 || start >= len(text) || text[start] != '<' {
|
||||
next, ok := consumeToolMarkupLessThan(text, start)
|
||||
if !ok {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
i := start + 1
|
||||
for i < len(text) && text[i] == '<' {
|
||||
i++
|
||||
i := next
|
||||
for {
|
||||
next, ok := consumeToolMarkupLessThan(text, i)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
i = next
|
||||
}
|
||||
closing := false
|
||||
if i < len(text) && text[i] == '/' {
|
||||
@@ -459,6 +464,14 @@ func consumeToolMarkupPipe(text string, idx int) (int, bool) {
|
||||
return idx, false
|
||||
}
|
||||
|
||||
func consumeToolMarkupLessThan(text string, idx int) (int, bool) {
|
||||
ch, size := normalizedASCIIAt(text, idx)
|
||||
if size <= 0 || ch != '<' {
|
||||
return idx, false
|
||||
}
|
||||
return idx + size, true
|
||||
}
|
||||
|
||||
func hasToolMarkupBoundary(text string, idx int) bool {
|
||||
if idx >= len(text) {
|
||||
return true
|
||||
@@ -488,6 +501,12 @@ func normalizedASCIIAt(text string, idx int) (byte, int) {
|
||||
}
|
||||
|
||||
func normalizeFullwidthASCII(r rune) rune {
|
||||
switch r {
|
||||
case '〈':
|
||||
return '<'
|
||||
case '〉':
|
||||
return '>'
|
||||
}
|
||||
if r >= '!' && r <= '~' {
|
||||
return r - 0xFEE0
|
||||
}
|
||||
|
||||
@@ -132,6 +132,37 @@ func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsCJKAngleDSMDrift(t *testing.T) {
|
||||
text := `<DSM|tool_calls>
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
〈/DSM|tool_calls〉`
|
||||
|
||||
calls := ParseToolCalls(text, []string{"Bash"})
|
||||
if len(calls) != 3 {
|
||||
t.Fatalf("expected three CJK-angle DSM drift calls, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "Bash" || calls[0].Input["command"] != "git log --oneline origin/dev..dev" {
|
||||
t.Fatalf("unexpected first CJK-angle DSM drift call: %#v", calls[0])
|
||||
}
|
||||
if calls[1].Name != "Bash" || calls[1].Input["description"] != "Show commits on origin/dev not on local dev" {
|
||||
t.Fatalf("unexpected second CJK-angle DSM drift call: %#v", calls[1])
|
||||
}
|
||||
if calls[2].Name != "Bash" || calls[2].Input["command"] != "git status -b --short" {
|
||||
t.Fatalf("unexpected third CJK-angle DSM drift call: %#v", calls[2])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsIgnoresBareHyphenatedToolCallsLookalike(t *testing.T) {
|
||||
text := `<tool-calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></tool-calls>`
|
||||
calls := ParseToolCalls(text, []string{"Bash"})
|
||||
|
||||
Reference in New Issue
Block a user