mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-17 14:45:11 +08:00
refactor: unify empty-output retry logic into shared completionruntime package and normalize protocol adapter boundary.
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
package toolcall
|
||||
|
||||
import "strings"
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func normalizeDSMLToolCallMarkup(text string) (string, bool) {
|
||||
if text == "" {
|
||||
@@ -42,8 +45,9 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
b.WriteByte('/')
|
||||
}
|
||||
b.WriteString(tag.Name)
|
||||
b.WriteString(text[tag.NameEnd : tag.End+1])
|
||||
if text[tag.End] != '>' {
|
||||
tail := normalizeToolMarkupTagTailForXML(text[tag.NameEnd : tag.End+1])
|
||||
b.WriteString(tail)
|
||||
if !strings.HasSuffix(tail, ">") {
|
||||
b.WriteByte('>')
|
||||
}
|
||||
i = tag.End + 1
|
||||
@@ -54,3 +58,27 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func normalizeToolMarkupTagTailForXML(tail string) string {
|
||||
if tail == "" {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(tail))
|
||||
for i := 0; i < len(tail); {
|
||||
r, size := utf8.DecodeRuneInString(tail[i:])
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
b.WriteByte(tail[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
switch normalizeFullwidthASCII(r) {
|
||||
case '>', '/', '=', '"', '\'':
|
||||
b.WriteRune(normalizeFullwidthASCII(r))
|
||||
default:
|
||||
b.WriteString(tail[i : i+size])
|
||||
}
|
||||
i += size
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)>`)
|
||||
|
||||
// cdataPattern matches a standalone CDATA section.
|
||||
var cdataPattern = regexp.MustCompile(`(?is)^<!\[CDATA\[(.*?)]]>$`)
|
||||
var cdataPattern = regexp.MustCompile(`(?is)^<!\[CDATA\[(.*?)]](?:>|>)$`)
|
||||
|
||||
func parseMarkupKVObject(text string) map[string]any {
|
||||
matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"html"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var xmlAttrPattern = regexp.MustCompile(`(?is)\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')`)
|
||||
@@ -214,7 +215,7 @@ func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
return end + len("]]>"), true, false
|
||||
return end + toolCDATACloseLenAt(text, end), true, false
|
||||
case strings.HasPrefix(text[i:], "<!--"):
|
||||
end := strings.Index(text[i+len("<!--"):], "-->")
|
||||
if end < 0 {
|
||||
@@ -227,15 +228,26 @@ func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked
|
||||
}
|
||||
|
||||
func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool {
|
||||
if start < 0 || len(text)-start < len(prefix) {
|
||||
return false
|
||||
_, ok := matchASCIIPrefixFoldAt(text, start, prefix)
|
||||
return ok
|
||||
}
|
||||
|
||||
func matchASCIIPrefixFoldAt(text string, start int, prefix string) (int, bool) {
|
||||
if start < 0 || start >= len(text) && prefix != "" {
|
||||
return 0, false
|
||||
}
|
||||
idx := start
|
||||
for j := 0; j < len(prefix); j++ {
|
||||
if asciiLower(text[start+j]) != asciiLower(prefix[j]) {
|
||||
return false
|
||||
if idx >= len(text) {
|
||||
return 0, false
|
||||
}
|
||||
ch, size := normalizedASCIIAt(text, idx)
|
||||
if size <= 0 || asciiLower(ch) != asciiLower(prefix[j]) {
|
||||
return 0, false
|
||||
}
|
||||
idx += size
|
||||
}
|
||||
return true
|
||||
return idx - start, true
|
||||
}
|
||||
|
||||
func asciiLower(b byte) byte {
|
||||
@@ -266,15 +278,14 @@ func findToolCDATAEnd(text string, from int) int {
|
||||
if from < 0 || from >= len(text) {
|
||||
return -1
|
||||
}
|
||||
const closeMarker = "]]>"
|
||||
firstNonFenceEnd := -1
|
||||
for searchFrom := from; searchFrom < len(text); {
|
||||
rel := strings.Index(text[searchFrom:], closeMarker)
|
||||
if rel < 0 {
|
||||
end := indexToolCDATAClose(text, searchFrom)
|
||||
if end < 0 {
|
||||
break
|
||||
}
|
||||
end := searchFrom + rel
|
||||
searchFrom = end + len(closeMarker)
|
||||
closeLen := toolCDATACloseLenAt(text, end)
|
||||
searchFrom = end + closeLen
|
||||
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
|
||||
continue
|
||||
}
|
||||
@@ -288,6 +299,31 @@ func findToolCDATAEnd(text string, from int) int {
|
||||
return firstNonFenceEnd
|
||||
}
|
||||
|
||||
func indexToolCDATAClose(text string, from int) int {
|
||||
if from < 0 {
|
||||
from = 0
|
||||
}
|
||||
asciiIdx := strings.Index(text[from:], "]]>")
|
||||
fullIdx := strings.Index(text[from:], "]]>")
|
||||
if asciiIdx < 0 && fullIdx < 0 {
|
||||
return -1
|
||||
}
|
||||
if asciiIdx < 0 {
|
||||
return from + fullIdx
|
||||
}
|
||||
if fullIdx < 0 || asciiIdx < fullIdx {
|
||||
return from + asciiIdx
|
||||
}
|
||||
return from + fullIdx
|
||||
}
|
||||
|
||||
func toolCDATACloseLenAt(text string, idx int) int {
|
||||
if strings.HasPrefix(text[idx:], "]]>") {
|
||||
return len("]]>")
|
||||
}
|
||||
return len("]]>")
|
||||
}
|
||||
|
||||
func cdataEndLooksStructural(text string, after int) bool {
|
||||
for after < len(text) {
|
||||
switch {
|
||||
@@ -327,22 +363,29 @@ func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
|
||||
}
|
||||
|
||||
func findXMLTagEnd(text string, from int) int {
|
||||
quote := byte(0)
|
||||
for i := maxInt(from, 0); i < len(text); i++ {
|
||||
ch := text[i]
|
||||
quote := rune(0)
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
r, size := utf8.DecodeRuneInString(text[i:])
|
||||
if r == utf8.RuneError && size == 0 {
|
||||
break
|
||||
}
|
||||
ch := normalizeFullwidthASCII(r)
|
||||
if quote != 0 {
|
||||
if ch == quote {
|
||||
quote = 0
|
||||
}
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
if ch == '"' || ch == '\'' {
|
||||
quote = ch
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
if ch == '>' {
|
||||
return i
|
||||
return i + size - 1
|
||||
}
|
||||
i += size
|
||||
}
|
||||
return -1
|
||||
}
|
||||
@@ -355,7 +398,8 @@ func hasXMLTagBoundary(text string, idx int) bool {
|
||||
case ' ', '\t', '\n', '\r', '>', '/':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
r, _ := utf8.DecodeRuneInString(text[idx:])
|
||||
return normalizeFullwidthASCII(r) == '>'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package toolcall
|
||||
|
||||
import "strings"
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type toolMarkupNameAlias struct {
|
||||
raw string
|
||||
@@ -184,7 +187,7 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
|
||||
}
|
||||
|
||||
func IsPartialToolMarkupTagPrefix(text string) bool {
|
||||
if text == "" || text[0] != '<' || strings.Contains(text, ">") {
|
||||
if text == "" || text[0] != '<' || strings.Contains(text, ">") || strings.Contains(text, ">") {
|
||||
return false
|
||||
}
|
||||
i := 1
|
||||
@@ -236,9 +239,10 @@ func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) {
|
||||
return idx + 1, true
|
||||
}
|
||||
if hasASCIIPrefixFoldAt(text, idx, "dsml") {
|
||||
next := idx + len("dsml")
|
||||
if next < len(text) && (text[next] == '-' || text[next] == '_') {
|
||||
next++
|
||||
dsmlLen, _ := matchASCIIPrefixFoldAt(text, idx, "dsml")
|
||||
next := idx + dsmlLen
|
||||
if sep, size := normalizedASCIIAt(text, next); sep == '-' || sep == '_' {
|
||||
next += size
|
||||
}
|
||||
return next, true
|
||||
}
|
||||
@@ -249,12 +253,17 @@ func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) {
|
||||
}
|
||||
|
||||
func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) {
|
||||
if idx < 0 || idx >= len(text) || !isToolMarkupPrefixSegmentByte(text[idx]) {
|
||||
nextSegment, ok := consumeToolMarkupPrefixSegment(text, idx)
|
||||
if !ok {
|
||||
return idx, false
|
||||
}
|
||||
j := idx + 1
|
||||
for j < len(text) && isToolMarkupPrefixSegmentByte(text[j]) {
|
||||
j++
|
||||
j := nextSegment
|
||||
for {
|
||||
nextSegment, ok = consumeToolMarkupPrefixSegment(text, j)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
j = nextSegment
|
||||
}
|
||||
k := j
|
||||
for k < len(text) && (text[k] == ' ' || text[k] == '\t' || text[k] == '\r' || text[k] == '\n') {
|
||||
@@ -262,8 +271,8 @@ func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) {
|
||||
}
|
||||
next, ok := consumeToolMarkupPipe(text, k)
|
||||
if !ok {
|
||||
if k < len(text) && (text[k] == '_' || text[k] == '-') {
|
||||
next = k + 1
|
||||
if sep, size := normalizedASCIIAt(text, k); sep == '_' || sep == '-' {
|
||||
next = k + size
|
||||
ok = true
|
||||
}
|
||||
}
|
||||
@@ -279,21 +288,32 @@ func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) {
|
||||
return next, true
|
||||
}
|
||||
|
||||
func isToolMarkupPrefixSegmentByte(b byte) bool {
|
||||
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9')
|
||||
func consumeToolMarkupPrefixSegment(text string, idx int) (int, bool) {
|
||||
ch, size := normalizedASCIIAt(text, idx)
|
||||
if size <= 0 {
|
||||
return idx, false
|
||||
}
|
||||
if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') {
|
||||
return idx + size, true
|
||||
}
|
||||
return idx, false
|
||||
}
|
||||
|
||||
func hasASCIIPartialPrefixFoldAt(text string, start int, prefix string) bool {
|
||||
remain := len(text) - start
|
||||
if remain <= 0 || remain > len(prefix) {
|
||||
if start < 0 || start >= len(text) {
|
||||
return false
|
||||
}
|
||||
for j := 0; j < remain; j++ {
|
||||
if asciiLower(text[start+j]) != asciiLower(prefix[j]) {
|
||||
idx := start
|
||||
matched := 0
|
||||
for matched < len(prefix) && idx < len(text) {
|
||||
ch, size := normalizedASCIIAt(text, idx)
|
||||
if size <= 0 || asciiLower(ch) != asciiLower(prefix[matched]) {
|
||||
return false
|
||||
}
|
||||
idx += size
|
||||
matched++
|
||||
}
|
||||
return true
|
||||
return matched > 0 && matched < len(prefix) && idx == len(text)
|
||||
}
|
||||
|
||||
func hasToolMarkupNamePrefix(text string, start int) bool {
|
||||
@@ -313,8 +333,8 @@ func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) {
|
||||
if name.dsmlOnly && !dsmlLike {
|
||||
continue
|
||||
}
|
||||
if hasASCIIPrefixFoldAt(text, start, name.raw) {
|
||||
return name.canonical, len(name.raw)
|
||||
if nameLen, ok := matchASCIIPrefixFoldAt(text, start, name.raw); ok {
|
||||
return name.canonical, nameLen
|
||||
}
|
||||
}
|
||||
return "", 0
|
||||
@@ -341,6 +361,29 @@ func hasToolMarkupBoundary(text string, idx int) bool {
|
||||
case ' ', '\t', '\n', '\r', '>', '/':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
r, _ := utf8.DecodeRuneInString(text[idx:])
|
||||
return normalizeFullwidthASCII(r) == '>'
|
||||
}
|
||||
}
|
||||
|
||||
func normalizedASCIIAt(text string, idx int) (byte, int) {
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0, 0
|
||||
}
|
||||
r, size := utf8.DecodeRuneInString(text[idx:])
|
||||
if r == utf8.RuneError && size == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
normalized := normalizeFullwidthASCII(r)
|
||||
if normalized > 0x7f {
|
||||
return 0, 0
|
||||
}
|
||||
return byte(normalized), size
|
||||
}
|
||||
|
||||
func normalizeFullwidthASCII(r rune) rune {
|
||||
if r >= '!' && r <= '~' {
|
||||
return r - 0xFEE0
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
@@ -111,6 +111,27 @@ func TestParseToolCallsSupportsArbitraryPrefixedToolMarkup(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
|
||||
text := `<dSML|tool_calls>
|
||||
<dSML|invoke name="Read">
|
||||
<dSML|parameter name="file_path"><![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/README.md]]></dSML|parameter>
|
||||
</dSML|invoke>
|
||||
<dSML|invoke name="Read">
|
||||
<dSML|parameter name="file_path"><![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/index.html]]></dSML|parameter>
|
||||
</dSML|invoke>
|
||||
</dSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"Read"})
|
||||
if len(calls) != 2 {
|
||||
t.Fatalf("expected two fullwidth DSML calls, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "Read" || calls[0].Input["file_path"] != "/Users/aq/Desktop/myproject/Personal_Blog/README.md" {
|
||||
t.Fatalf("unexpected first fullwidth DSML call: %#v", calls[0])
|
||||
}
|
||||
if calls[1].Name != "Read" || calls[1].Input["file_path"] != "/Users/aq/Desktop/myproject/Personal_Blog/index.html" {
|
||||
t.Fatalf("unexpected second fullwidth DSML call: %#v", calls[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsIgnoresBareHyphenatedToolCallsLookalike(t *testing.T) {
|
||||
text := `<tool-calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></tool-calls>`
|
||||
calls := ParseToolCalls(text, []string{"Bash"})
|
||||
|
||||
Reference in New Issue
Block a user