Skip to content

Commit

Permalink
Merge pull request #23 from ShiboSoftwareDev/main
Browse files Browse the repository at this point in the history
create a log directory of all failed attempts
  • Loading branch information
ShiboSoftwareDev authored Feb 2, 2025
2 parents 7b32819 + f0c7d34 commit 0ce7481
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,4 @@ dist

# Evalite
evalite-report.jsonl
benchmarks-evalite/attempt-logs/*
52 changes: 51 additions & 1 deletion benchmarks-evalite/benchmark-error-correction.eval.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import fs, { readdirSync } from "node:fs"
import fs, { readdirSync, rmSync } from "node:fs"
import path from "node:path"
import toml from "toml"
import { anthropic } from "../lib/code-runner/anthropic"
Expand All @@ -8,6 +8,44 @@ import { evalite } from "evalite"
import { CircuitScorer } from "./scorers/circuit-scorer"
import { askAboutOutput } from "tests/fixtures/ask-about-output"

const cleanupLogDirectory = () => {
const logsDir = path.join(__dirname, "./attempt-logs")
if (fs.existsSync(logsDir)) {
rmSync(logsDir, { recursive: true, force: true })
}
fs.mkdirSync(logsDir, { recursive: true })
}

const saveAttemptLog = (
fileName: string,
prompt: string,
code: string,
error: string,
) => {
const logsDir = path.join(__dirname, "./attempt-logs")
if (!fs.existsSync(logsDir)) {
fs.mkdirSync(logsDir, { recursive: true })
}

const content = `# Attempt Log
## Prompt
${prompt}
## Error
\`\`\`
${error}
\`\`\`
## Code
\`\`\`tsx
${code}
\`\`\`
`

fs.writeFileSync(path.join(logsDir, fileName), content)
}

const savePrompt = (prompt: string, fileName: string) => {
const promptsDir = path.join(__dirname, "./prompts")

Expand All @@ -33,6 +71,7 @@ interface Problem {
}

let systemPrompt = ""
let promptNumber = 0

const loadProblems = (filePath: string): Problem[] => {
const tomlContent = fs.readFileSync(filePath, "utf-8")
Expand Down Expand Up @@ -107,10 +146,12 @@ const runAI = async ({
const errorCorrection = async ({
attempts = 0,
prompt,
promptNumber,
previousAttempts = [],
}: {
attempts?: number
prompt: string
promptNumber: number
previousAttempts?: AttemptHistory[]
}): Promise<{
code: string
Expand All @@ -134,6 +175,12 @@ const errorCorrection = async ({
const error = evaluation.error || ""
attempts++
previousAttempts.push({ code, error })
saveAttemptLog(
`prompt-${promptNumber}-attempt-${attempts}.md`,
prompt,
code,
error,
)

if (attempts > 3) {
return {
Expand All @@ -145,12 +192,14 @@ const errorCorrection = async ({
return await errorCorrection({
attempts,
prompt,
promptNumber,
previousAttempts,
})
}

evalite("Reasoning Electronics Engineer", {
data: async () => {
cleanupLogDirectory()
const problems = loadProblems(path.join(__dirname, "./problems-1.toml"))
systemPrompt = await createPrompt()

Expand All @@ -169,6 +218,7 @@ evalite("Reasoning Electronics Engineer", {
task: async (input) => {
const { code, codeBlock, error } = await errorCorrection({
prompt: input.prompt,
promptNumber: ++promptNumber,
})

const output: {
Expand Down

0 comments on commit 0ce7481

Please sign in to comment.