mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-09 18:57:43 +08:00
Merge pull request #460 from waiwaic/main
fix(toolcall): eliminate strings.ToLower panics from Unicode case folding
This commit is contained in:
@@ -292,7 +292,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
|
||||
if !containsStr(prompt, "Search the web") {
|
||||
t.Fatalf("expected description in prompt")
|
||||
}
|
||||
if !containsStr(prompt, "<|DSML|tool_calls>") {
|
||||
if !containsStr(prompt, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("expected DSML tool_calls format in prompt")
|
||||
}
|
||||
if !containsStr(prompt, "TOOL CALL FORMAT") {
|
||||
|
||||
@@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
|
||||
}
|
||||
|
||||
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
|
||||
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
|
||||
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
|
||||
|
||||
@@ -11,46 +11,45 @@ import "strings"
|
||||
func BuildToolCallInstructions(toolNames []string) string {
|
||||
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
|
||||
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="TOOL_NAME_HERE">
|
||||
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="TOOL_NAME_HERE">
|
||||
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
RULES:
|
||||
1) Use the <|DSML|tool_calls> wrapper format.
|
||||
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
|
||||
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
|
||||
1) Use the <|DSML|tool_calls> wrapper format.
|
||||
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
|
||||
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
|
||||
4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
|
||||
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
|
||||
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
|
||||
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
|
||||
7) Numbers, booleans, and null stay plain text.
|
||||
8) Use only the parameter names in the tool schema. Do not invent fields.
|
||||
9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
|
||||
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||||
11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
|
||||
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||||
11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
|
||||
12) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
|
||||
|
||||
PARAMETER SHAPES:
|
||||
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
|
||||
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
|
||||
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
|
||||
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
|
||||
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
|
||||
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
|
||||
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
|
||||
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
|
||||
|
||||
【WRONG — Do NOT do these】:
|
||||
|
||||
Wrong 1 — mixed text after XML:
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
|
||||
Wrong 2 — Markdown code fences:
|
||||
` + "```xml" + `
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls>
|
||||
` + "```" + `
|
||||
Wrong 3 — missing opening wrapper:
|
||||
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
|
||||
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
|
||||
` + buildCorrectToolExamples(toolNames)
|
||||
}
|
||||
|
||||
|
||||
@@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
|
||||
|
||||
func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"read_file"})
|
||||
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
|
||||
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
|
||||
t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, "Wrong 3 — missing opening wrapper") {
|
||||
|
||||
@@ -145,7 +145,6 @@ func SanitizeLooseCDATA(text string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
lower := strings.ToLower(text)
|
||||
const openMarker = "<![cdata["
|
||||
const closeMarker = "]]>"
|
||||
|
||||
@@ -154,17 +153,16 @@ func SanitizeLooseCDATA(text string) string {
|
||||
changed := false
|
||||
pos := 0
|
||||
for pos < len(text) {
|
||||
startRel := strings.Index(lower[pos:], openMarker)
|
||||
if startRel < 0 {
|
||||
start := indexASCIIFold(text, pos, openMarker)
|
||||
if start < 0 {
|
||||
b.WriteString(text[pos:])
|
||||
break
|
||||
}
|
||||
start := pos + startRel
|
||||
contentStart := start + len(openMarker)
|
||||
b.WriteString(text[pos:start])
|
||||
|
||||
if endRel := strings.Index(lower[contentStart:], closeMarker); endRel >= 0 {
|
||||
end := contentStart + endRel + len(closeMarker)
|
||||
if endRel := indexASCIIFold(text, contentStart, closeMarker); endRel >= 0 {
|
||||
end := endRel + len(closeMarker)
|
||||
b.WriteString(text[start:end])
|
||||
pos = end
|
||||
continue
|
||||
|
||||
@@ -212,17 +212,16 @@ func firstFenceMarkerIndex(line string) int {
|
||||
}
|
||||
|
||||
func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool, string) {
|
||||
lower := strings.ToLower(line)
|
||||
pos := 0
|
||||
state := inCDATA
|
||||
fenceMarker := cdataFenceMarker
|
||||
lineForFence := line
|
||||
if !state {
|
||||
start := strings.Index(lower[pos:], "<![cdata[")
|
||||
start := indexASCIIFold(line, pos, "<![cdata[")
|
||||
if start < 0 {
|
||||
return false, ""
|
||||
}
|
||||
pos += start + len("<![cdata[")
|
||||
pos = start + len("<![cdata[")
|
||||
state = true
|
||||
lineForFence = line[pos:]
|
||||
}
|
||||
@@ -239,24 +238,23 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
|
||||
fenceMarker = ""
|
||||
}
|
||||
|
||||
for pos < len(lower) {
|
||||
end := strings.Index(lower[pos:], "]]>")
|
||||
if end < 0 {
|
||||
for pos < len(line) {
|
||||
endPos := indexASCIIFold(line, pos, "]]>")
|
||||
if endPos < 0 {
|
||||
return true, fenceMarker
|
||||
}
|
||||
endPos := pos + end
|
||||
pos = endPos + len("]]>")
|
||||
if fenceMarker != "" {
|
||||
continue
|
||||
}
|
||||
if cdataEndLooksStructural(lower, pos) || strings.TrimSpace(lower[pos:]) == "" {
|
||||
if cdataEndLooksStructural(line, pos) || strings.TrimSpace(line[pos:]) == "" {
|
||||
state = false
|
||||
for pos < len(lower) {
|
||||
start := strings.Index(lower[pos:], "<![cdata[")
|
||||
for pos < len(line) {
|
||||
start := indexASCIIFold(line, pos, "<![cdata[")
|
||||
if start < 0 {
|
||||
return false, ""
|
||||
}
|
||||
pos += start + len("<![cdata[")
|
||||
pos = start + len("<![cdata[")
|
||||
state = true
|
||||
trimmedTail := strings.TrimLeft(line[pos:], " \t")
|
||||
if marker, ok := parseFenceOpen(trimmedTail); ok {
|
||||
|
||||
@@ -141,7 +141,6 @@ func findXMLElementBlocks(text, tag string) []xmlElementBlock {
|
||||
}
|
||||
|
||||
func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart int, attrs string, ok bool) {
|
||||
lower := strings.ToLower(text)
|
||||
target := "<" + strings.ToLower(tag)
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
@@ -152,7 +151,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
||||
i = next
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], target) && hasXMLTagBoundary(text, i+len(target)) {
|
||||
if hasASCIIPrefixFoldAt(text, i, target) && hasXMLTagBoundary(text, i+len(target)) {
|
||||
end := findXMLTagEnd(text, i+len(target))
|
||||
if end < 0 {
|
||||
return -1, -1, "", false
|
||||
@@ -165,7 +164,6 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
||||
}
|
||||
|
||||
func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart, closeEnd int, ok bool) {
|
||||
lower := strings.ToLower(text)
|
||||
openTarget := "<" + strings.ToLower(tag)
|
||||
closeTarget := "</" + strings.ToLower(tag)
|
||||
depth := 1
|
||||
@@ -178,7 +176,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
i = next
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], closeTarget) && hasXMLTagBoundary(text, i+len(closeTarget)) {
|
||||
if hasASCIIPrefixFoldAt(text, i, closeTarget) && hasXMLTagBoundary(text, i+len(closeTarget)) {
|
||||
end := findXMLTagEnd(text, i+len(closeTarget))
|
||||
if end < 0 {
|
||||
return -1, -1, false
|
||||
@@ -190,7 +188,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
i = end + 1
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], openTarget) && hasXMLTagBoundary(text, i+len(openTarget)) {
|
||||
if hasASCIIPrefixFoldAt(text, i, openTarget) && hasXMLTagBoundary(text, i+len(openTarget)) {
|
||||
end := findXMLTagEnd(text, i+len(openTarget))
|
||||
if end < 0 {
|
||||
return -1, -1, false
|
||||
@@ -247,6 +245,23 @@ func asciiLower(b byte) byte {
|
||||
return b
|
||||
}
|
||||
|
||||
// indexASCIIFold returns the absolute byte position in s where substr (ASCII-only) is
|
||||
// found case-insensitively, scanning forward from start. Returns -1 if not found.
|
||||
// Unlike strings.Index on a lowercased copy, this does not allocate or risk byte-length
|
||||
// mismatch when non-ASCII runes change width under case folding.
|
||||
func indexASCIIFold(s string, start int, substr string) int {
|
||||
if start < 0 || len(s)-start < len(substr) {
|
||||
return -1
|
||||
}
|
||||
end := len(s) - len(substr) + 1
|
||||
for i := start; i < end; i++ {
|
||||
if hasASCIIPrefixFoldAt(s, i, substr) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func findToolCDATAEnd(text string, from int) int {
|
||||
if from < 0 || from >= len(text) {
|
||||
return -1
|
||||
|
||||
@@ -134,7 +134,6 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
|
||||
if start < 0 || start >= len(text) || text[start] != '<' {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
lower := strings.ToLower(text)
|
||||
i := start + 1
|
||||
for i < len(text) && text[i] == '<' {
|
||||
i++
|
||||
@@ -144,8 +143,8 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
|
||||
closing = true
|
||||
i++
|
||||
}
|
||||
i, dsmlLike := consumeToolMarkupNamePrefix(lower, text, i)
|
||||
name, nameLen := matchToolMarkupName(lower, i, dsmlLike)
|
||||
i, dsmlLike := consumeToolMarkupNamePrefix(text, i)
|
||||
name, nameLen := matchToolMarkupName(text, i, dsmlLike)
|
||||
if nameLen == 0 {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
@@ -188,7 +187,6 @@ func IsPartialToolMarkupTagPrefix(text string) bool {
|
||||
if text == "" || text[0] != '<' || strings.Contains(text, ">") {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(text)
|
||||
i := 1
|
||||
for i < len(text) && text[i] == '<' {
|
||||
i++
|
||||
@@ -203,13 +201,13 @@ func IsPartialToolMarkupTagPrefix(text string) bool {
|
||||
if i == len(text) {
|
||||
return true
|
||||
}
|
||||
if hasToolMarkupNamePrefix(lower[i:]) {
|
||||
if hasToolMarkupNamePrefix(text, i) {
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix("dsml", lower[i:]) {
|
||||
if hasDSMLPrefix(text, i) {
|
||||
return true
|
||||
}
|
||||
next, ok := consumeToolMarkupNamePrefixOnce(lower, text, i)
|
||||
next, ok := consumeToolMarkupNamePrefixOnce(text, i)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
@@ -218,10 +216,10 @@ func IsPartialToolMarkupTagPrefix(text string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func consumeToolMarkupNamePrefix(lower, text string, idx int) (int, bool) {
|
||||
func consumeToolMarkupNamePrefix(text string, idx int) (int, bool) {
|
||||
dsmlLike := false
|
||||
for {
|
||||
next, ok := consumeToolMarkupNamePrefixOnce(lower, text, idx)
|
||||
next, ok := consumeToolMarkupNamePrefixOnce(text, idx)
|
||||
if !ok {
|
||||
return idx, dsmlLike
|
||||
}
|
||||
@@ -230,14 +228,14 @@ func consumeToolMarkupNamePrefix(lower, text string, idx int) (int, bool) {
|
||||
}
|
||||
}
|
||||
|
||||
func consumeToolMarkupNamePrefixOnce(lower, text string, idx int) (int, bool) {
|
||||
func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) {
|
||||
if next, ok := consumeToolMarkupPipe(text, idx); ok {
|
||||
return next, true
|
||||
}
|
||||
if idx < len(text) && (text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n') {
|
||||
return idx + 1, true
|
||||
}
|
||||
if strings.HasPrefix(lower[idx:], "dsml") {
|
||||
if hasASCIIPrefixFoldAt(text, idx, "dsml") {
|
||||
next := idx + len("dsml")
|
||||
if next < len(text) && (text[next] == '-' || text[next] == '_') {
|
||||
next++
|
||||
@@ -247,21 +245,49 @@ func consumeToolMarkupNamePrefixOnce(lower, text string, idx int) (int, bool) {
|
||||
return idx, false
|
||||
}
|
||||
|
||||
func hasToolMarkupNamePrefix(lowerTail string) bool {
|
||||
// hasDSMLPrefix checks if "dsml" starts with text[start:] (case-insensitive).
|
||||
func hasDSMLPrefix(text string, start int) bool {
|
||||
const dsml = "dsml"
|
||||
remain := len(text) - start
|
||||
if remain <= 0 || remain > len(dsml) {
|
||||
return false
|
||||
}
|
||||
for j := 0; j < remain; j++ {
|
||||
if asciiLower(text[start+j]) != dsml[j] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func hasToolMarkupNamePrefix(text string, start int) bool {
|
||||
for _, name := range toolMarkupNames {
|
||||
if strings.HasPrefix(lowerTail, name.raw) || strings.HasPrefix(name.raw, lowerTail) {
|
||||
if hasASCIIPrefixFoldAt(text, start, name.raw) {
|
||||
return true
|
||||
}
|
||||
tailLen := len(text) - start
|
||||
if tailLen > 0 && tailLen <= len(name.raw) {
|
||||
match := true
|
||||
for j := 0; j < tailLen; j++ {
|
||||
if asciiLower(text[start+j]) != asciiLower(name.raw[j]) {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if match {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func matchToolMarkupName(lower string, start int, dsmlLike bool) (string, int) {
|
||||
func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) {
|
||||
for _, name := range toolMarkupNames {
|
||||
if name.dsmlOnly && !dsmlLike {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[start:], name.raw) {
|
||||
if hasASCIIPrefixFoldAt(text, start, name.raw) {
|
||||
return name.canonical, len(name.raw)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user