mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-21 16:37:47 +08:00
refactor: generalize DSML tag parsing to tolerate model noise; split tiktoken by build tags
Replace hardcoded DSML typo variant lists in Go/Node tool call parsers with generalized prefix consumption that tolerates repeated leading <, repeated DSML prefix noise, and trailing pipe terminators. Split tiktoken-dependent token counting into a build-tagged file for non-cgo platform compatibility. Add /data directory to Dockerfile for bind-mount permissions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -44,6 +44,9 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
}
|
||||
b.WriteString(tag.Name)
|
||||
b.WriteString(text[tag.NameEnd : tag.End+1])
|
||||
if text[tag.End] != '>' {
|
||||
b.WriteByte('>')
|
||||
}
|
||||
i = tag.End + 1
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -128,34 +128,39 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
|
||||
}
|
||||
lower := strings.ToLower(text)
|
||||
i := start + 1
|
||||
for i < len(text) && text[i] == '<' {
|
||||
i++
|
||||
}
|
||||
closing := false
|
||||
if i < len(text) && text[i] == '/' {
|
||||
closing = true
|
||||
i++
|
||||
}
|
||||
dsmlLike := false
|
||||
if next, ok := consumeToolMarkupPipe(text, i); ok {
|
||||
dsmlLike = true
|
||||
i = next
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], "dsml") {
|
||||
dsmlLike = true
|
||||
i += len("dsml")
|
||||
for next, ok := consumeToolMarkupSeparator(text, i); ok; next, ok = consumeToolMarkupSeparator(text, i) {
|
||||
i = next
|
||||
}
|
||||
}
|
||||
i, dsmlLike := consumeToolMarkupNamePrefix(lower, text, i)
|
||||
name, nameLen := matchToolMarkupName(lower, i)
|
||||
if nameLen == 0 {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
nameEnd := i + nameLen
|
||||
nameEndBeforePipes := nameEnd
|
||||
for next, ok := consumeToolMarkupPipe(text, nameEnd); ok; next, ok = consumeToolMarkupPipe(text, nameEnd) {
|
||||
nameEnd = next
|
||||
}
|
||||
hasTrailingPipe := nameEnd > nameEndBeforePipes
|
||||
if !hasToolMarkupBoundary(text, nameEnd) {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
end := findXMLTagEnd(text, nameEnd)
|
||||
if end < 0 {
|
||||
return ToolMarkupTag{}, false
|
||||
if !hasTrailingPipe {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
end = nameEnd - 1
|
||||
}
|
||||
if hasTrailingPipe {
|
||||
if nextLT := strings.IndexByte(text[nameEnd:], '<'); nextLT >= 0 && end >= nameEnd+nextLT {
|
||||
end = nameEnd - 1
|
||||
}
|
||||
}
|
||||
trimmed := strings.TrimSpace(text[start : end+1])
|
||||
return ToolMarkupTag{
|
||||
@@ -171,6 +176,74 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
|
||||
}, true
|
||||
}
|
||||
|
||||
func IsPartialToolMarkupTagPrefix(text string) bool {
|
||||
if text == "" || text[0] != '<' || strings.Contains(text, ">") {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(text)
|
||||
i := 1
|
||||
for i < len(text) && text[i] == '<' {
|
||||
i++
|
||||
}
|
||||
if i >= len(text) {
|
||||
return true
|
||||
}
|
||||
if text[i] == '/' {
|
||||
i++
|
||||
}
|
||||
for i <= len(text) {
|
||||
if i == len(text) {
|
||||
return true
|
||||
}
|
||||
if hasToolMarkupNamePrefix(lower[i:]) {
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix("dsml", lower[i:]) {
|
||||
return true
|
||||
}
|
||||
next, ok := consumeToolMarkupNamePrefixOnce(lower, text, i)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
i = next
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func consumeToolMarkupNamePrefix(lower, text string, idx int) (int, bool) {
|
||||
dsmlLike := false
|
||||
for {
|
||||
next, ok := consumeToolMarkupNamePrefixOnce(lower, text, idx)
|
||||
if !ok {
|
||||
return idx, dsmlLike
|
||||
}
|
||||
idx = next
|
||||
dsmlLike = true
|
||||
}
|
||||
}
|
||||
|
||||
func consumeToolMarkupNamePrefixOnce(lower, text string, idx int) (int, bool) {
|
||||
if next, ok := consumeToolMarkupPipe(text, idx); ok {
|
||||
return next, true
|
||||
}
|
||||
if idx < len(text) && (text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n') {
|
||||
return idx + 1, true
|
||||
}
|
||||
if strings.HasPrefix(lower[idx:], "dsml") {
|
||||
return idx + len("dsml"), true
|
||||
}
|
||||
return idx, false
|
||||
}
|
||||
|
||||
func hasToolMarkupNamePrefix(lowerTail string) bool {
|
||||
for _, name := range toolMarkupNames {
|
||||
if strings.HasPrefix(lowerTail, name) || strings.HasPrefix(name, lowerTail) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func matchToolMarkupName(lower string, start int) (string, int) {
|
||||
for _, name := range toolMarkupNames {
|
||||
if strings.HasPrefix(lower[start:], name) {
|
||||
@@ -193,19 +266,6 @@ func consumeToolMarkupPipe(text string, idx int) (int, bool) {
|
||||
return idx, false
|
||||
}
|
||||
|
||||
func consumeToolMarkupSeparator(text string, idx int) (int, bool) {
|
||||
if idx >= len(text) {
|
||||
return idx, false
|
||||
}
|
||||
if text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n' {
|
||||
return idx + 1, true
|
||||
}
|
||||
if next, ok := consumeToolMarkupPipe(text, idx); ok {
|
||||
return next, true
|
||||
}
|
||||
return idx, false
|
||||
}
|
||||
|
||||
func hasToolMarkupBoundary(text string, idx int) bool {
|
||||
if idx >= len(text) {
|
||||
return true
|
||||
|
||||
@@ -41,6 +41,52 @@ func TestParseToolCallsSupportsDSMLShell(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsToleratesDSMLTrailingPipeTagTerminator(t *testing.T) {
|
||||
text := strings.Join([]string{
|
||||
`<|DSML|tool_calls| `,
|
||||
` <|DSML|invoke name="terminal">`,
|
||||
` <|DSML|parameter name="command"><![CDATA[find "/home" -type d]]></|DSML|parameter>`,
|
||||
` <|DSML|parameter name="timeout"><![CDATA[10]]></|DSML|parameter>`,
|
||||
` </|DSML|invoke>`,
|
||||
`</|DSML|tool_calls>`,
|
||||
}, "\n")
|
||||
calls := ParseToolCalls(text, []string{"terminal"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected one trailing-pipe DSML call, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "terminal" {
|
||||
t.Fatalf("expected terminal tool, got %#v", calls[0])
|
||||
}
|
||||
if calls[0].Input["command"] != `find "/home" -type d` {
|
||||
t.Fatalf("expected command argument, got %#v", calls[0].Input)
|
||||
}
|
||||
if calls[0].Input["timeout"] != float64(10) {
|
||||
t.Fatalf("expected numeric timeout, got %#v", calls[0].Input)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsToleratesExtraLeadingLessThanBeforeDSML(t *testing.T) {
|
||||
text := `<<|DSML|tool_calls><<|DSML|invoke name="Bash"><<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"Bash"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected one extra-leading-less-than DSML call, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "Bash" || calls[0].Input["command"] != "pwd" {
|
||||
t.Fatalf("unexpected extra-leading-less-than DSML parse result: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsToleratesRepeatedDSMLPrefixNoise(t *testing.T) {
|
||||
text := `<<DSML|DSML|tool_calls><<DSML|DSML|invoke name="Bash"><<DSML|DSML|parameter name="command"><![CDATA[git status]]></DSML|DSML|parameter></DSML|DSML|invoke></DSML|DSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"Bash"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected one repeated-prefix DSML call, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "Bash" || calls[0].Input["command"] != "git status" {
|
||||
t.Fatalf("unexpected repeated-prefix DSML parse result: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T) {
|
||||
content := `<tool_calls><invoke name="demo"><parameter name="value">x</parameter></invoke></tool_calls>`
|
||||
text := `<|DSML|tool_calls><|DSML|invoke name="Write"><|DSML|parameter name="file_path">notes.md</|DSML|parameter><|DSML|parameter name="content"><![CDATA[` + content + `]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`
|
||||
|
||||
Reference in New Issue
Block a user