diff --git a/AGENTS.md b/AGENTS.md index 231b9295c4..ad0f0cd6d3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -44,5 +44,4 @@ IMPORTANT: Prefer retrieval-led reasoning over pre-training-led reasoning. Alway - `docs/environment-variables.md` — Env var rules, DI helpers, loading order - `docs/agents-and-tools.md` — Agent system, shell shims, tool definitions - `docs/patterns/handle-steps-generators.md` — handleSteps generator patterns and spawn_agents tool calls -- `docs/evalbuff/interpreting-task-prompts.md` - `docs/patterns/discover-before-implement.md` diff --git a/cli/src/__tests__/path-completion.test.ts b/cli/src/__tests__/path-completion.test.ts index 80ecc482fd..8c09dde41a 100644 --- a/cli/src/__tests__/path-completion.test.ts +++ b/cli/src/__tests__/path-completion.test.ts @@ -168,8 +168,6 @@ describe('getPathCompletion', () => { // Create a test directory structure we can control // Note: This test is tricky because we can't easily create dirs in home // So we'll test with the actual home directory if it has subdirs - const homeDir = os.homedir() - // Try completing from home directory with tilde const result = getPathCompletion('~/') diff --git a/cli/src/__tests__/unit/copy-button.test.ts b/cli/src/__tests__/unit/copy-button.test.ts index 585fd8c1ae..11e7cb57a3 100644 --- a/cli/src/__tests__/unit/copy-button.test.ts +++ b/cli/src/__tests__/unit/copy-button.test.ts @@ -138,18 +138,14 @@ describe('CopyButton - copied state reset timing', () => { }) test('multiple rapid clicks only create one active timer', () => { - let isCopied = false let currentTimerId: number | null = null const handleCopy = () => { if (currentTimerId !== null) { clearTimeout(currentTimerId) } - const newState = copyButtonHandlers.handleCopy() - isCopied = newState.isCopied - currentTimerId = setTimeout(() => { - isCopied = false - }, COPIED_RESET_DELAY_MS) as unknown as number + copyButtonHandlers.handleCopy() + currentTimerId = setTimeout(() => {}, COPIED_RESET_DELAY_MS) as unknown as number } handleCopy() diff --git a/docs/evalbuff/interpreting-task-prompts.md b/docs/evalbuff/interpreting-task-prompts.md deleted file mode 100644 index 0ccaf4e9ef..0000000000 --- a/docs/evalbuff/interpreting-task-prompts.md +++ /dev/null @@ -1,63 +0,0 @@ -# Interpreting Task Prompts (Especially Eval-Generated Ones) - -When working with task prompts, especially those auto-generated from commit history for evaluation purposes, the prompt text may not accurately describe the actual work needed. - -## The Problem - -Evalbuff generates task prompts by analyzing commits. Sometimes the prompt will say "create documentation about X" when the actual ground truth is "fix test scripts in package.json and CI workflow files." This happens when: - -1. The commit message is misleading (e.g., "Simplify AGENTS.md" when it actually removes test scripts) -2. The prompt generator focuses on visible file additions rather than the semantic meaning of the change -3. The task is stated in terms of what a developer might ASK for, not what they actually need - -## Solution: Always Check Ground Truth First - -Before implementing ANY task: - -1. **Check if there's a ground truth diff available** - look for references to expected changes, test files, or "what should have been done" -2. **Examine file paths and extensions in the ground truth**: - - `.json` files (especially `package.json`) → likely config/dependency changes - - `.yml`/`.yaml` files in `.github/workflows/` → CI/CD configuration changes - - `.md` files → documentation (but could also be removing or editing existing docs) - - `.ts`/`.js` files → code changes -3. **Read the actual diff content, not just the prompt** - the diff shows EXACTLY what changed -4. **Distinguish between creation vs. modification**: - - Does the ground truth show `new file mode` or additions to existing files? - - Is this refactoring, removal, or net-new functionality? - -## Example: The AGENTS.md Confusion - -Prompt said: -> "Can you create an AGENTS.md file at the root that provides an overview..." - -Ground truth showed: -```diff ---- a/.agents/package.json -+++ b/.agents/package.json -- "test:e2e": "bun test e2e" ---- a/.github/workflows/nightly-e2e.yml -+++ b/.github/workflows/nightly-e2e.yml -- run: cd .agents && bun run test:e2e -+ run: cd agents && bun run test:e2e -``` - -The actual task was about: -- Removing a test script from package.json -- Fixing directory references in a CI workflow -- NOT about creating documentation - -The agent should have recognized the ground truth shows `.json` and `.yml` config files, not `.md` documentation files. - -## When In Doubt - -If the prompt seems to conflict with file paths/types in the ground truth: -1. Trust the ground truth diff over the prompt text -2. Read the actual file contents being changed -3. Understand the PURPOSE of the change (fixing tests, updating config, refactoring) before implementing -4. Ask clarifying questions if the task is genuinely ambiguous - -## Red Flags - -- Prompt says "create docs" but ground truth shows only config file changes → likely NOT a docs task -- Prompt says "add feature X" but ground truth removes code → likely a cleanup/refactor task -- Prompt uses vague language ("simplify", "improve") → read the diff to understand the specific technical change \ No newline at end of file