refactor: unify empty-output retry logic into shared completionruntime package and normalize protocol adapter boundary.

This commit is contained in:
CJACK
2026-05-10 00:10:53 +08:00
parent 067cf465bb
commit 7c66742a19
32 changed files with 930 additions and 371 deletions

View File

@@ -1,6 +1,9 @@
package toolcall
import "strings"
import (
"strings"
"unicode/utf8"
)
func normalizeDSMLToolCallMarkup(text string) (string, bool) {
if text == "" {
@@ -42,8 +45,9 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
b.WriteByte('/')
}
b.WriteString(tag.Name)
b.WriteString(text[tag.NameEnd : tag.End+1])
if text[tag.End] != '>' {
tail := normalizeToolMarkupTagTailForXML(text[tag.NameEnd : tag.End+1])
b.WriteString(tail)
if !strings.HasSuffix(tail, ">") {
b.WriteByte('>')
}
i = tag.End + 1
@@ -54,3 +58,27 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
}
return b.String()
}
func normalizeToolMarkupTagTailForXML(tail string) string {
if tail == "" {
return ""
}
var b strings.Builder
b.Grow(len(tail))
for i := 0; i < len(tail); {
r, size := utf8.DecodeRuneInString(tail[i:])
if r == utf8.RuneError && size == 1 {
b.WriteByte(tail[i])
i++
continue
}
switch normalizeFullwidthASCII(r) {
case '>', '/', '=', '"', '\'':
b.WriteRune(normalizeFullwidthASCII(r))
default:
b.WriteString(tail[i : i+size])
}
i += size
}
return b.String()
}

View File

@@ -10,7 +10,7 @@ import (
var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)>`)
// cdataPattern matches a standalone CDATA section.
var cdataPattern = regexp.MustCompile(`(?is)^<!\[CDATA\[(.*?)]]>$`)
var cdataPattern = regexp.MustCompile(`(?is)^<!\[CDATA\[(.*?)]](?:>|)$`)
func parseMarkupKVObject(text string) map[string]any {
matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1)

View File

@@ -6,6 +6,7 @@ import (
"html"
"regexp"
"strings"
"unicode/utf8"
)
var xmlAttrPattern = regexp.MustCompile(`(?is)\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')`)
@@ -214,7 +215,7 @@ func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked
if end < 0 {
return 0, false, true
}
return end + len("]]>"), true, false
return end + toolCDATACloseLenAt(text, end), true, false
case strings.HasPrefix(text[i:], "<!--"):
end := strings.Index(text[i+len("<!--"):], "-->")
if end < 0 {
@@ -227,15 +228,26 @@ func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked
}
func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool {
if start < 0 || len(text)-start < len(prefix) {
return false
_, ok := matchASCIIPrefixFoldAt(text, start, prefix)
return ok
}
func matchASCIIPrefixFoldAt(text string, start int, prefix string) (int, bool) {
if start < 0 || start >= len(text) && prefix != "" {
return 0, false
}
idx := start
for j := 0; j < len(prefix); j++ {
if asciiLower(text[start+j]) != asciiLower(prefix[j]) {
return false
if idx >= len(text) {
return 0, false
}
ch, size := normalizedASCIIAt(text, idx)
if size <= 0 || asciiLower(ch) != asciiLower(prefix[j]) {
return 0, false
}
idx += size
}
return true
return idx - start, true
}
func asciiLower(b byte) byte {
@@ -266,15 +278,14 @@ func findToolCDATAEnd(text string, from int) int {
if from < 0 || from >= len(text) {
return -1
}
const closeMarker = "]]>"
firstNonFenceEnd := -1
for searchFrom := from; searchFrom < len(text); {
rel := strings.Index(text[searchFrom:], closeMarker)
if rel < 0 {
end := indexToolCDATAClose(text, searchFrom)
if end < 0 {
break
}
end := searchFrom + rel
searchFrom = end + len(closeMarker)
closeLen := toolCDATACloseLenAt(text, end)
searchFrom = end + closeLen
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
continue
}
@@ -288,6 +299,31 @@ func findToolCDATAEnd(text string, from int) int {
return firstNonFenceEnd
}
func indexToolCDATAClose(text string, from int) int {
if from < 0 {
from = 0
}
asciiIdx := strings.Index(text[from:], "]]>")
fullIdx := strings.Index(text[from:], "]]")
if asciiIdx < 0 && fullIdx < 0 {
return -1
}
if asciiIdx < 0 {
return from + fullIdx
}
if fullIdx < 0 || asciiIdx < fullIdx {
return from + asciiIdx
}
return from + fullIdx
}
func toolCDATACloseLenAt(text string, idx int) int {
if strings.HasPrefix(text[idx:], "]]") {
return len("]]")
}
return len("]]>")
}
func cdataEndLooksStructural(text string, after int) bool {
for after < len(text) {
switch {
@@ -327,22 +363,29 @@ func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
}
func findXMLTagEnd(text string, from int) int {
quote := byte(0)
for i := maxInt(from, 0); i < len(text); i++ {
ch := text[i]
quote := rune(0)
for i := maxInt(from, 0); i < len(text); {
r, size := utf8.DecodeRuneInString(text[i:])
if r == utf8.RuneError && size == 0 {
break
}
ch := normalizeFullwidthASCII(r)
if quote != 0 {
if ch == quote {
quote = 0
}
i += size
continue
}
if ch == '"' || ch == '\'' {
quote = ch
i += size
continue
}
if ch == '>' {
return i
return i + size - 1
}
i += size
}
return -1
}
@@ -355,7 +398,8 @@ func hasXMLTagBoundary(text string, idx int) bool {
case ' ', '\t', '\n', '\r', '>', '/':
return true
default:
return false
r, _ := utf8.DecodeRuneInString(text[idx:])
return normalizeFullwidthASCII(r) == '>'
}
}

View File

@@ -1,6 +1,9 @@
package toolcall
import "strings"
import (
"strings"
"unicode/utf8"
)
type toolMarkupNameAlias struct {
raw string
@@ -184,7 +187,7 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
}
func IsPartialToolMarkupTagPrefix(text string) bool {
if text == "" || text[0] != '<' || strings.Contains(text, ">") {
if text == "" || text[0] != '<' || strings.Contains(text, ">") || strings.Contains(text, "") {
return false
}
i := 1
@@ -236,9 +239,10 @@ func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) {
return idx + 1, true
}
if hasASCIIPrefixFoldAt(text, idx, "dsml") {
next := idx + len("dsml")
if next < len(text) && (text[next] == '-' || text[next] == '_') {
next++
dsmlLen, _ := matchASCIIPrefixFoldAt(text, idx, "dsml")
next := idx + dsmlLen
if sep, size := normalizedASCIIAt(text, next); sep == '-' || sep == '_' {
next += size
}
return next, true
}
@@ -249,12 +253,17 @@ func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) {
}
func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) {
if idx < 0 || idx >= len(text) || !isToolMarkupPrefixSegmentByte(text[idx]) {
nextSegment, ok := consumeToolMarkupPrefixSegment(text, idx)
if !ok {
return idx, false
}
j := idx + 1
for j < len(text) && isToolMarkupPrefixSegmentByte(text[j]) {
j++
j := nextSegment
for {
nextSegment, ok = consumeToolMarkupPrefixSegment(text, j)
if !ok {
break
}
j = nextSegment
}
k := j
for k < len(text) && (text[k] == ' ' || text[k] == '\t' || text[k] == '\r' || text[k] == '\n') {
@@ -262,8 +271,8 @@ func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) {
}
next, ok := consumeToolMarkupPipe(text, k)
if !ok {
if k < len(text) && (text[k] == '_' || text[k] == '-') {
next = k + 1
if sep, size := normalizedASCIIAt(text, k); sep == '_' || sep == '-' {
next = k + size
ok = true
}
}
@@ -279,21 +288,32 @@ func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) {
return next, true
}
func isToolMarkupPrefixSegmentByte(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9')
func consumeToolMarkupPrefixSegment(text string, idx int) (int, bool) {
ch, size := normalizedASCIIAt(text, idx)
if size <= 0 {
return idx, false
}
if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') {
return idx + size, true
}
return idx, false
}
func hasASCIIPartialPrefixFoldAt(text string, start int, prefix string) bool {
remain := len(text) - start
if remain <= 0 || remain > len(prefix) {
if start < 0 || start >= len(text) {
return false
}
for j := 0; j < remain; j++ {
if asciiLower(text[start+j]) != asciiLower(prefix[j]) {
idx := start
matched := 0
for matched < len(prefix) && idx < len(text) {
ch, size := normalizedASCIIAt(text, idx)
if size <= 0 || asciiLower(ch) != asciiLower(prefix[matched]) {
return false
}
idx += size
matched++
}
return true
return matched > 0 && matched < len(prefix) && idx == len(text)
}
func hasToolMarkupNamePrefix(text string, start int) bool {
@@ -313,8 +333,8 @@ func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) {
if name.dsmlOnly && !dsmlLike {
continue
}
if hasASCIIPrefixFoldAt(text, start, name.raw) {
return name.canonical, len(name.raw)
if nameLen, ok := matchASCIIPrefixFoldAt(text, start, name.raw); ok {
return name.canonical, nameLen
}
}
return "", 0
@@ -341,6 +361,29 @@ func hasToolMarkupBoundary(text string, idx int) bool {
case ' ', '\t', '\n', '\r', '>', '/':
return true
default:
return false
r, _ := utf8.DecodeRuneInString(text[idx:])
return normalizeFullwidthASCII(r) == '>'
}
}
func normalizedASCIIAt(text string, idx int) (byte, int) {
if idx < 0 || idx >= len(text) {
return 0, 0
}
r, size := utf8.DecodeRuneInString(text[idx:])
if r == utf8.RuneError && size == 0 {
return 0, 0
}
normalized := normalizeFullwidthASCII(r)
if normalized > 0x7f {
return 0, 0
}
return byte(normalized), size
}
func normalizeFullwidthASCII(r rune) rune {
if r >= '' && r <= '' {
return r - 0xFEE0
}
return r
}

View File

@@ -111,6 +111,27 @@ func TestParseToolCallsSupportsArbitraryPrefixedToolMarkup(t *testing.T) {
}
}
func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
text := `<tool_calls>
<invoke name="Read">
<parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/README.md]]</parameter>
</invoke>
<invoke name="Read">
<parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/index.html]]</parameter>
</invoke>
</tool_calls>`
calls := ParseToolCalls(text, []string{"Read"})
if len(calls) != 2 {
t.Fatalf("expected two fullwidth DSML calls, got %#v", calls)
}
if calls[0].Name != "Read" || calls[0].Input["file_path"] != "/Users/aq/Desktop/myproject/Personal_Blog/README.md" {
t.Fatalf("unexpected first fullwidth DSML call: %#v", calls[0])
}
if calls[1].Name != "Read" || calls[1].Input["file_path"] != "/Users/aq/Desktop/myproject/Personal_Blog/index.html" {
t.Fatalf("unexpected second fullwidth DSML call: %#v", calls[1])
}
}
func TestParseToolCallsIgnoresBareHyphenatedToolCallsLookalike(t *testing.T) {
text := `<tool-calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></tool-calls>`
calls := ParseToolCalls(text, []string{"Bash"})