refactor: remove configurable toolcall policy and fix to feature matching with high-confidence early emit

2026-05-04 08:25:26 +08:00 · 2026-03-30 01:56:25 +08:00
parent 822b14ed6b
commit a7c9dfd7c0
20 changed files with 38 additions and 179 deletions
--- a/API.en.md
+++ b/API.en.md
@@ -630,10 +630,11 @@ Reads runtime settings and status, including:
 - `success`
 - `admin` (`has_password_hash`, `jwt_expire_hours`, `jwt_valid_after_unix`, `default_password_warning`)
 - `runtime` (`account_max_inflight`, `account_max_queue`, `global_max_inflight`, `token_refresh_interval_hours`)
- `toolcall` / `responses` / `embeddings`
+- `responses` / `embeddings`
 - `auto_delete` (`sessions`)
 - `claude_mapping` / `model_aliases`
 - `env_backed`, `needs_vercel_sync`
+- `toolcall` policy is fixed to `feature_match + high` and is no longer returned or editable via settings

 ### `PUT /admin/settings`

@@ -641,12 +642,12 @@ Hot-updates runtime settings. Supported fields:

 - `admin.jwt_expire_hours`
 - `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours`
- `toolcall.mode` / `toolcall.early_emit_confidence`
 - `responses.store_ttl_seconds`
 - `embeddings.provider`
 - `auto_delete.sessions`
 - `claude_mapping`
 - `model_aliases`
+- `toolcall` policy is fixed and is no longer writable through settings

 ### `POST /admin/settings/password`

@@ -669,7 +670,7 @@ Imports full config with:

 The request can send config directly, or wrapped as `{"config": {...}, "mode":"merge"}`.
 Query params `?mode=merge` / `?mode=replace` are also supported.
-Import accepts `keys`, `accounts`, `claude_mapping` / `claude_model_mapping`, `model_aliases`, `admin`, `runtime`, `toolcall`, `responses`, `embeddings`, and `auto_delete`.
+Import accepts `keys`, `accounts`, `claude_mapping` / `claude_model_mapping`, `model_aliases`, `admin`, `runtime`, `responses`, `embeddings`, and `auto_delete`; legacy `toolcall` fields are ignored.

 ### `GET /admin/config/export`

--- a/API.md
+++ b/API.md
@@ -639,10 +639,11 @@ data: {"type":"message_stop"}
 - `success`
 - `admin`（`has_password_hash`、`jwt_expire_hours`、`jwt_valid_after_unix`、`default_password_warning`）
 - `runtime`（`account_max_inflight`、`account_max_queue`、`global_max_inflight`、`token_refresh_interval_hours`）
- `toolcall` / `responses` / `embeddings`
+- `responses` / `embeddings`
 - `auto_delete`（`sessions`）
 - `claude_mapping` / `model_aliases`
 - `env_backed`、`needs_vercel_sync`
+- `toolcall` 策略已固定为 `feature_match + high`，不再通过 settings 返回或修改

 ### `PUT /admin/settings`

@@ -650,12 +651,12 @@ data: {"type":"message_stop"}

 - `admin.jwt_expire_hours`
 - `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours`
- `toolcall.mode` / `toolcall.early_emit_confidence`
 - `responses.store_ttl_seconds`
 - `embeddings.provider`
 - `auto_delete.sessions`
 - `claude_mapping`
 - `model_aliases`
+- `toolcall` 策略已固定，不再作为可写入字段

 ### `POST /admin/settings/password`

@@ -678,7 +679,7 @@ data: {"type":"message_stop"}

 请求可直接传配置对象，或使用 `{"config": {...}, "mode":"merge"}` 包裹格式。
 也支持在查询参数里传 `?mode=merge` / `?mode=replace`。
-导入时会接受 `keys`、`accounts`、`claude_mapping` / `claude_model_mapping`、`model_aliases`、`admin`、`runtime`、`toolcall`、`responses`、`embeddings`、`auto_delete` 等字段。
+导入时会接受 `keys`、`accounts`、`claude_mapping` / `claude_model_mapping`、`model_aliases`、`admin`、`runtime`、`responses`、`embeddings`、`auto_delete` 等字段；`toolcall` 相关字段会被忽略。

 ### `GET /admin/config/export`

--- a/README.MD
+++ b/README.MD
@@ -270,10 +270,6 @@ cp opencode.json.example opencode.json
  "compat": {
    "wide_input_strict_output": true
  },
-  "toolcall": {
-    "mode": "feature_match",
-    "early_emit_confidence": "high"
-  },
  "responses": {
    "store_ttl_seconds": 900
  },
@@ -304,7 +300,7 @@ cp opencode.json.example opencode.json
 - `token`：配置文件中即使填写也会在加载时被清空（不会从 `config.json` 读取 token）；实际 token 仅在运行时内存中维护并自动刷新
 - `model_aliases`：常见模型名（如 GPT/Codex/Claude）到 DeepSeek 模型的映射
 - `compat.wide_input_strict_output`：建议保持 `true`（当前实现默认宽进严出）
- `toolcall`：固定采用特征匹配 + 高置信早发策略
+- `toolcall`：策略已固定为特征匹配 + 高置信早发，不再作为可配置项
 - `responses.store_ttl_seconds`：`/v1/responses/{id}` 的内存缓存 TTL
 - `embeddings.provider`：embedding 提供方（当前内置 `deterministic/mock/builtin`）
 - `claude_mapping`：字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型（兼容读取 `claude_model_mapping`）
--- a/README.en.md
+++ b/README.en.md
@@ -270,10 +270,6 @@ cp opencode.json.example opencode.json
  "compat": {
    "wide_input_strict_output": true
  },
-  "toolcall": {
-    "mode": "feature_match",
-    "early_emit_confidence": "high"
-  },
  "responses": {
    "store_ttl_seconds": 900
  },
@@ -304,7 +300,7 @@ cp opencode.json.example opencode.json
 - `token`: Even if set in `config.json`, it is cleared during load (DS2API does not read persisted tokens from config); runtime tokens are maintained/refreshed in memory only
 - `model_aliases`: Map common model names (GPT/Codex/Claude) to DeepSeek models
 - `compat.wide_input_strict_output`: Keep `true` (current default policy)
- `toolcall`: Fixed to feature matching + high-confidence early emit
+- `toolcall`: Fixed to feature matching + high-confidence early emit, no longer configurable
 - `responses.store_ttl_seconds`: In-memory TTL for `/v1/responses/{id}`
 - `embeddings.provider`: Embeddings provider (`deterministic/mock/builtin` built-in)
 - `claude_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models (still compatible with `claude_model_mapping`)
--- a/internal/adapter/openai/handler_toolcall_policy.go
+++ b/internal/adapter/openai/handler_toolcall_policy.go
@@ -1,19 +1,9 @@
 package openai

-import "strings"
-
 func (h *Handler) toolcallFeatureMatchEnabled() bool {
-	if h == nil || h.Store == nil {
-		return true
-	}
-	mode := strings.TrimSpace(strings.ToLower(h.Store.ToolcallMode()))
-	return mode == "" || mode == "feature_match"
+	return true
 }

 func (h *Handler) toolcallEarlyEmitHighConfidence() bool {
-	if h == nil || h.Store == nil {
-		return true
-	}
-	level := strings.TrimSpace(strings.ToLower(h.Store.ToolcallEarlyEmitConfidence()))
-	return level == "" || level == "high"
+	return true
 }
--- a/internal/adapter/openai/vercel_stream.go
+++ b/internal/adapter/openai/vercel_stream.go
@@ -93,18 +93,16 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
 	}
 	leased = true
 	writeJSON(w, http.StatusOK, map[string]any{
-		"session_id":               sessionID,
-		"lease_id":                 leaseID,
-		"model":                    stdReq.ResponseModel,
-		"final_prompt":             stdReq.FinalPrompt,
-		"thinking_enabled":         stdReq.Thinking,
-		"search_enabled":           stdReq.Search,
-		"tool_names":               stdReq.ToolNames,
-		"toolcall_feature_match":   h.toolcallFeatureMatchEnabled(),
-		"toolcall_early_emit_high": h.toolcallEarlyEmitHighConfidence(),
-		"deepseek_token":           a.DeepSeekToken,
-		"pow_header":               powHeader,
-		"payload":                  payload,
+		"session_id":       sessionID,
+		"lease_id":         leaseID,
+		"model":            stdReq.ResponseModel,
+		"final_prompt":     stdReq.FinalPrompt,
+		"thinking_enabled": stdReq.Thinking,
+		"search_enabled":   stdReq.Search,
+		"tool_names":       stdReq.ToolNames,
+		"deepseek_token":   a.DeepSeekToken,
+		"pow_header":       powHeader,
+		"payload":          payload,
 	})
 }

--- a/internal/admin/handler_config_import.go
+++ b/internal/admin/handler_config_import.go
@@ -120,12 +120,6 @@ func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {
 					next.ModelAliases[k] = v
 				}
 			}
-			if strings.TrimSpace(incoming.Toolcall.Mode) != "" {
-				next.Toolcall.Mode = incoming.Toolcall.Mode
-			}
-			if strings.TrimSpace(incoming.Toolcall.EarlyEmitConfidence) != "" {
-				next.Toolcall.EarlyEmitConfidence = incoming.Toolcall.EarlyEmitConfidence
-			}
 			if incoming.Responses.StoreTTLSeconds > 0 {
 				next.Responses.StoreTTLSeconds = incoming.Responses.StoreTTLSeconds
 			}
--- a/internal/admin/handler_settings_parse.go
+++ b/internal/admin/handler_settings_parse.go
@@ -21,11 +21,10 @@ func boolFrom(v any) bool {
 	}
 }

-func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.ToolcallConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, map[string]string, map[string]string, error) {
+func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, map[string]string, map[string]string, error) {
 	var (
 		adminCfg      *config.AdminConfig
 		runtimeCfg    *config.RuntimeConfig
-		toolcallCfg   *config.ToolcallConfig
 		respCfg       *config.ResponsesConfig
 		embCfg        *config.EmbeddingsConfig
 		autoDeleteCfg *config.AutoDeleteConfig
@@ -38,7 +37,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["jwt_expire_hours"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 720 {
-				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("admin.jwt_expire_hours must be between 1 and 720")
+				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("admin.jwt_expire_hours must be between 1 and 720")
 			}
 			cfg.JWTExpireHours = n
 		}
@@ -50,66 +49,43 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["account_max_inflight"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 256 {
-				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_inflight must be between 1 and 256")
+				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_inflight must be between 1 and 256")
 			}
 			cfg.AccountMaxInflight = n
 		}
 		if v, exists := raw["account_max_queue"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 200000 {
-				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_queue must be between 1 and 200000")
+				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_queue must be between 1 and 200000")
 			}
 			cfg.AccountMaxQueue = n
 		}
 		if v, exists := raw["global_max_inflight"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 200000 {
-				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
+				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
 			}
 			cfg.GlobalMaxInflight = n
 		}
 		if v, exists := raw["token_refresh_interval_hours"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 720 {
-				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
+				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
 			}
 			cfg.TokenRefreshIntervalHours = n
 		}
 		if cfg.AccountMaxInflight > 0 && cfg.GlobalMaxInflight > 0 && cfg.GlobalMaxInflight < cfg.AccountMaxInflight {
-			return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
+			return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
 		}
 		runtimeCfg = cfg
 	}

-	if raw, ok := req["toolcall"].(map[string]any); ok {
-		cfg := &config.ToolcallConfig{}
-		if v, exists := raw["mode"]; exists {
-			mode := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v)))
-			switch mode {
-			case "feature_match", "off":
-				cfg.Mode = mode
-			default:
-				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("toolcall.mode must be feature_match or off")
-			}
-		}
-		if v, exists := raw["early_emit_confidence"]; exists {
-			level := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v)))
-			switch level {
-			case "high", "low", "off":
-				cfg.EarlyEmitConfidence = level
-			default:
-				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("toolcall.early_emit_confidence must be high, low or off")
-			}
-		}
-		toolcallCfg = cfg
-	}
-
 	if raw, ok := req["responses"].(map[string]any); ok {
 		cfg := &config.ResponsesConfig{}
 		if v, exists := raw["store_ttl_seconds"]; exists {
 			n := intFrom(v)
 			if n < 30 || n > 86400 {
-				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
+				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
 			}
 			cfg.StoreTTLSeconds = n
 		}
@@ -157,5 +133,5 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		autoDeleteCfg = cfg
 	}

-	return adminCfg, runtimeCfg, toolcallCfg, respCfg, embCfg, autoDeleteCfg, claudeMap, aliasMap, nil
+	return adminCfg, runtimeCfg, respCfg, embCfg, autoDeleteCfg, claudeMap, aliasMap, nil
 }
--- a/internal/admin/handler_settings_read.go
+++ b/internal/admin/handler_settings_read.go
@@ -26,7 +26,6 @@ func (h *Handler) getSettings(w http.ResponseWriter, _ *http.Request) {
 			"global_max_inflight":          h.Store.RuntimeGlobalMaxInflight(recommended),
 			"token_refresh_interval_hours": h.Store.RuntimeTokenRefreshIntervalHours(),
 		},
-		"toolcall":          snap.Toolcall,
 		"responses":         snap.Responses,
 		"embeddings":        snap.Embeddings,
 		"auto_delete":       snap.AutoDelete,
--- a/internal/admin/handler_settings_write.go
+++ b/internal/admin/handler_settings_write.go
@@ -17,7 +17,7 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 		return
 	}

-	adminCfg, runtimeCfg, toolcallCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
+	adminCfg, runtimeCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
 	if err != nil {
 		writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
 		return
@@ -49,14 +49,6 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 				c.Runtime.TokenRefreshIntervalHours = runtimeCfg.TokenRefreshIntervalHours
 			}
 		}
-		if toolcallCfg != nil {
-			if strings.TrimSpace(toolcallCfg.Mode) != "" {
-				c.Toolcall.Mode = strings.TrimSpace(toolcallCfg.Mode)
-			}
-			if strings.TrimSpace(toolcallCfg.EarlyEmitConfidence) != "" {
-				c.Toolcall.EarlyEmitConfidence = strings.TrimSpace(toolcallCfg.EarlyEmitConfidence)
-			}
-		}
 		if responsesCfg != nil && responsesCfg.StoreTTLSeconds > 0 {
 			c.Responses.StoreTTLSeconds = responsesCfg.StoreTTLSeconds
 		}
--- a/internal/admin/settings_validation.go
+++ b/internal/admin/settings_validation.go
@@ -12,8 +12,6 @@ func normalizeSettingsConfig(c *config.Config) {
 		return
 	}
 	c.Admin.PasswordHash = strings.TrimSpace(c.Admin.PasswordHash)
-	c.Toolcall.Mode = strings.ToLower(strings.TrimSpace(c.Toolcall.Mode))
-	c.Toolcall.EarlyEmitConfidence = strings.ToLower(strings.TrimSpace(c.Toolcall.EarlyEmitConfidence))
 	c.Embeddings.Provider = strings.TrimSpace(c.Embeddings.Provider)
 }

@@ -27,20 +25,6 @@ func validateSettingsConfig(c config.Config) error {
 	if c.Responses.StoreTTLSeconds != 0 && (c.Responses.StoreTTLSeconds < 30 || c.Responses.StoreTTLSeconds > 86400) {
 		return fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
 	}
-	if mode := strings.TrimSpace(c.Toolcall.Mode); mode != "" {
-		switch mode {
-		case "feature_match", "off":
-		default:
-			return fmt.Errorf("toolcall.mode must be feature_match or off")
-		}
-	}
-	if level := strings.TrimSpace(c.Toolcall.EarlyEmitConfidence); level != "" {
-		switch level {
-		case "high", "low", "off":
-		default:
-			return fmt.Errorf("toolcall.early_emit_confidence must be high, low or off")
-		}
-	}
 	if c.Embeddings.Provider != "" && strings.TrimSpace(c.Embeddings.Provider) == "" {
 		return fmt.Errorf("embeddings.provider cannot be empty")
 	}
--- a/internal/config/codec.go
+++ b/internal/config/codec.go
@@ -38,9 +38,6 @@ func (c Config) MarshalJSON() ([]byte, error) {
 	if c.Compat.WideInputStrictOutput != nil {
 		m["compat"] = c.Compat
 	}
-	if strings.TrimSpace(c.Toolcall.Mode) != "" || strings.TrimSpace(c.Toolcall.EarlyEmitConfidence) != "" {
-		m["toolcall"] = c.Toolcall
-	}
 	if c.Responses.StoreTTLSeconds > 0 {
 		m["responses"] = c.Responses
 	}
@@ -98,9 +95,7 @@ func (c *Config) UnmarshalJSON(b []byte) error {
 				return fmt.Errorf("invalid field %q: %w", k, err)
 			}
 		case "toolcall":
-			if err := json.Unmarshal(v, &c.Toolcall); err != nil {
-				return fmt.Errorf("invalid field %q: %w", k, err)
-			}
+			// Legacy field ignored. Toolcall policy is fixed and no longer configurable.
 		case "responses":
 			if err := json.Unmarshal(v, &c.Responses); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
@@ -143,7 +138,6 @@ func (c Config) Clone() Config {
 		Compat: CompatConfig{
 			WideInputStrictOutput: cloneBoolPtr(c.Compat.WideInputStrictOutput),
 		},
-		Toolcall:         c.Toolcall,
 		Responses:        c.Responses,
 		Embeddings:       c.Embeddings,
 		AutoDelete:       c.AutoDelete,
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -9,7 +9,6 @@ type Config struct {
 	Admin            AdminConfig       `json:"admin,omitempty"`
 	Runtime          RuntimeConfig     `json:"runtime,omitempty"`
 	Compat           CompatConfig      `json:"compat,omitempty"`
-	Toolcall         ToolcallConfig    `json:"toolcall,omitempty"`
 	Responses        ResponsesConfig   `json:"responses,omitempty"`
 	Embeddings       EmbeddingsConfig  `json:"embeddings,omitempty"`
 	AutoDelete       AutoDeleteConfig  `json:"auto_delete"`
@@ -68,11 +67,6 @@ type RuntimeConfig struct {
 	TokenRefreshIntervalHours int `json:"token_refresh_interval_hours,omitempty"`
 }

-type ToolcallConfig struct {
-	Mode                string `json:"mode,omitempty"`
-	EarlyEmitConfidence string `json:"early_emit_confidence,omitempty"`
-}
-
 type ResponsesConfig struct {
 	StoreTTLSeconds int `json:"store_ttl_seconds,omitempty"`
 }
--- a/internal/config/store_accessors.go
+++ b/internal/config/store_accessors.go
@@ -43,23 +43,11 @@ func (s *Store) CompatWideInputStrictOutput() bool {
 }

 func (s *Store) ToolcallMode() string {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-	mode := strings.TrimSpace(strings.ToLower(s.cfg.Toolcall.Mode))
-	if mode == "" {
-		return "feature_match"
-	}
-	return mode
+	return "feature_match"
 }

 func (s *Store) ToolcallEarlyEmitConfidence() string {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-	level := strings.TrimSpace(strings.ToLower(s.cfg.Toolcall.EarlyEmitConfidence))
-	if level == "" {
-		return "high"
-	}
-	return level
+	return "high"
 }

 func (s *Store) ResponsesStoreTTLSeconds() int {
--- a/internal/js/chat-stream/toolcall_policy.js
+++ b/internal/js/chat-stream/toolcall_policy.js
@@ -12,12 +12,10 @@ function resolveToolcallPolicy(prepBody, payloadTools) {
  if (toolNames.length === 0 && Array.isArray(payloadTools) && payloadTools.length > 0) {
    toolNames = ['__any_tool__'];
  }
-  const featureMatchEnabled = boolDefaultTrue(prepBody && prepBody.toolcall_feature_match);
-  const emitEarlyToolDeltas = featureMatchEnabled && boolDefaultTrue(prepBody && prepBody.toolcall_early_emit_high);
  return {
    toolNames,
    toolSieveEnabled: toolNames.length > 0,
-    emitEarlyToolDeltas,
+    emitEarlyToolDeltas: true,
  };
 }

--- a/tests/node/chat-stream.test.js
+++ b/tests/node/chat-stream.test.js
@@ -34,7 +34,7 @@ test('resolveToolcallPolicy defaults to feature-match + early emit when prepare
  assert.equal(policy.emitEarlyToolDeltas, true);
 });

-test('resolveToolcallPolicy respects prepare flags and prepared tool names', () => {
+test('resolveToolcallPolicy ignores prepare flags and keeps early emit enabled', () => {
  const policy = resolveToolcallPolicy(
    {
      tool_names: [' prepped_tool ', '', null],
@@ -45,7 +45,7 @@ test('resolveToolcallPolicy respects prepare flags and prepared tool names', ()
  );
  assert.deepEqual(policy.toolNames, ['prepped_tool']);
  assert.equal(policy.toolSieveEnabled, true);
-  assert.equal(policy.emitEarlyToolDeltas, false);
+  assert.equal(policy.emitEarlyToolDeltas, true);
 });

 test('normalizePreparedToolNames filters empty values', () => {
--- a/webui/src/features/settings/BehaviorSection.jsx
+++ b/webui/src/features/settings/BehaviorSection.jsx
@@ -3,35 +3,6 @@ export default function BehaviorSection({ t, form, setForm }) {
        <div className="bg-card border border-border rounded-xl p-5 space-y-4">
            <h3 className="font-semibold">{t('settings.behaviorTitle')}</h3>
            <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
-                <label className="text-sm space-y-2">
-                    <span className="text-muted-foreground">{t('settings.toolcallMode')}</span>
-                    <select
-                        value={form.toolcall.mode}
-                        onChange={(e) => setForm((prev) => ({
-                            ...prev,
-                            toolcall: { ...prev.toolcall, mode: e.target.value },
-                        }))}
-                        className="w-full bg-background border border-border rounded-lg px-3 py-2"
-                    >
-                        <option value="feature_match">feature_match</option>
-                        <option value="off">off</option>
-                    </select>
-                </label>
-                <label className="text-sm space-y-2">
-                    <span className="text-muted-foreground">{t('settings.earlyEmitConfidence')}</span>
-                    <select
-                        value={form.toolcall.early_emit_confidence}
-                        onChange={(e) => setForm((prev) => ({
-                            ...prev,
-                            toolcall: { ...prev.toolcall, early_emit_confidence: e.target.value },
-                        }))}
-                        className="w-full bg-background border border-border rounded-lg px-3 py-2"
-                    >
-                        <option value="high">high</option>
-                        <option value="low">low</option>
-                        <option value="off">off</option>
-                    </select>
-                </label>
                <label className="text-sm space-y-2">
                    <span className="text-muted-foreground">{t('settings.responsesTTL')}</span>
                    <input
--- a/webui/src/features/settings/useSettingsForm.js
+++ b/webui/src/features/settings/useSettingsForm.js
@@ -13,7 +13,6 @@ const MAX_AUTO_FETCH_FAILURES = 3
 const DEFAULT_FORM = {
    admin: { jwt_expire_hours: 24 },
    runtime: { account_max_inflight: 2, account_max_queue: 10, global_max_inflight: 10, token_refresh_interval_hours: 6 },
-    toolcall: { mode: 'feature_match', early_emit_confidence: 'high' },
    responses: { store_ttl_seconds: 900 },
    embeddings: { provider: '' },
    auto_delete: { sessions: false },
@@ -47,10 +46,6 @@ function fromServerForm(data) {
            global_max_inflight: Number(data.runtime?.global_max_inflight || 10),
            token_refresh_interval_hours: Number(data.runtime?.token_refresh_interval_hours || 6),
        },
-        toolcall: {
-            mode: data.toolcall?.mode || 'feature_match',
-            early_emit_confidence: data.toolcall?.early_emit_confidence || 'high',
-        },
        responses: {
            store_ttl_seconds: Number(data.responses?.store_ttl_seconds || 900),
        },
@@ -74,10 +69,6 @@ function toServerPayload(form) {
            global_max_inflight: Number(form.runtime.global_max_inflight),
            token_refresh_interval_hours: Number(form.runtime.token_refresh_interval_hours),
        },
-        toolcall: {
-            mode: String(form.toolcall.mode || '').trim(),
-            early_emit_confidence: String(form.toolcall.early_emit_confidence || '').trim(),
-        },
        responses: { store_ttl_seconds: Number(form.responses.store_ttl_seconds) },
        embeddings: { provider: String(form.embeddings.provider || '').trim() },
        auto_delete: { sessions: Boolean(form.auto_delete?.sessions) },
--- a/webui/src/locales/en.json
+++ b/webui/src/locales/en.json
@@ -228,8 +228,6 @@
        "globalMaxInflight": "Global max inflight",
        "tokenRefreshIntervalHours": "Managed token refresh interval (hours)",
        "behaviorTitle": "Behavior",
-        "toolcallMode": "Toolcall mode",
-        "earlyEmitConfidence": "Early emit confidence",
        "responsesTTL": "Responses store TTL (seconds)",
        "embeddingsProvider": "Embeddings provider",
        "modelTitle": "Model mapping",
--- a/webui/src/locales/zh.json
+++ b/webui/src/locales/zh.json
@@ -228,8 +228,6 @@
        "globalMaxInflight": "全局并发上限",
        "tokenRefreshIntervalHours": "托管账号 Token 刷新间隔（小时）",
        "behaviorTitle": "行为设置",
-        "toolcallMode": "Toolcall 模式",
-        "earlyEmitConfidence": "早发置信度",
        "responsesTTL": "Responses 缓存 TTL（秒）",
        "embeddingsProvider": "Embeddings Provider",
        "modelTitle": "模型映射",