-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from ShiboSoftwareDev/main
new benchmarking tool: evalite
- Loading branch information
Showing
8 changed files
with
265 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -175,3 +175,6 @@ dist | |
.DS_Store | ||
.vscode | ||
.aider* | ||
|
||
# Evalite | ||
evalite-report.jsonl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import fs from 'fs'; | ||
import path from 'path'; | ||
import toml from 'toml'; | ||
import { anthropic } from '../lib/code-runner/anthropic'; | ||
import { safeEvaluateCode } from '../lib/code-runner/safe-evaluate-code'; | ||
import { askAboutOutput } from '../tests/fixtures/ask-about-output'; | ||
import { createCircuitBoard1Template } from '../prompt-templates/create-circuit-board1'; | ||
import { evalite } from "evalite"; | ||
import { Levenshtein } from "autoevals"; | ||
|
||
interface Problem { | ||
prompt: string; | ||
questions: { text: string; answer: boolean }[]; | ||
} | ||
|
||
const loadProblems = (filePath: string): Problem[] => { | ||
const tomlContent = fs.readFileSync(filePath, 'utf-8'); | ||
const parsedToml = toml.parse(tomlContent); | ||
|
||
return parsedToml.problems.map((problem: any) => ({ | ||
prompt: problem.prompt, | ||
questions: problem.questions.map((q: any) => ({ | ||
text: q.text, | ||
answer: q.answer | ||
})) | ||
})); | ||
}; | ||
|
||
const runAI = async (prompt: string): Promise<string> => { | ||
const fullPrompt = createCircuitBoard1Template({ | ||
currentCode: "", | ||
availableImports: {} | ||
}) + "\n\n" + prompt; | ||
const completion = await anthropic.messages.create({ | ||
model: 'claude-3-5-haiku-20241022', | ||
max_tokens: 1024, | ||
system: "You are an expert in electronic circuit design and tscircuit.", | ||
messages: [ | ||
{ | ||
role: 'user', | ||
content: fullPrompt, | ||
}, | ||
], | ||
}); | ||
|
||
return (completion as any).content[0]?.text || ''; | ||
}; | ||
|
||
const problems = loadProblems(path.join(__dirname, './problems.toml')); | ||
let problemNumber = 0; | ||
for (const problem of problems) { | ||
problemNumber++ | ||
evalite(`problem: ${problemNumber}`, { | ||
data: async () => { | ||
const aiResponse = await runAI(problem.prompt); | ||
const codeMatch = aiResponse.match(/```tsx\s*([\s\S]*?)\s*```/); | ||
const code = codeMatch ? codeMatch[1].trim() : ''; | ||
const evaluation = safeEvaluateCode(code, { | ||
outputType: 'board', | ||
preSuppliedImports: {}, | ||
}); | ||
return problem.questions.map(question => ({ input: { code: evaluation.success ? code : null, question: question.text }, expected: question.answer.toString() })); | ||
}, | ||
task: async (input) => { | ||
if (!input.code) | ||
return "" | ||
const answer = await askAboutOutput(input.code, input.question); | ||
return answer.toString(); | ||
}, | ||
scorers: [Levenshtein], | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
[[problems]] | ||
prompt = """ | ||
Create a 555 timer with a blinking LED. | ||
""" | ||
questions = [ | ||
{ text = "The circuit includes a 555 timer IC", answer = true }, | ||
{ text = "The LED blinks at a frequency of 1 Hz", answer = true }, | ||
{ text = "The circuit uses a resistor and capacitor to set the timing", answer = true }, | ||
{ text = "The LED is connected directly to the 555 timer output", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Design a simple LED driver circuit using a transistor and a current-limiting resistor. | ||
""" | ||
questions = [ | ||
{ text = "The circuit includes a transistor", answer = true }, | ||
{ text = "There is a current-limiting resistor for the LED", answer = true }, | ||
{ text = "The LED brightness can be controlled by the transistor's base current", answer = true }, | ||
{ text = "The circuit requires an operational amplifier", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Create a basic RC low-pass filter circuit with a cutoff frequency of 1 kHz. | ||
""" | ||
questions = [ | ||
{ text = "The circuit includes a resistor and a capacitor", answer = true }, | ||
{ text = "The cutoff frequency is 1 kHz", answer = true }, | ||
{ text = "The circuit attenuates high-frequency signals", answer = true }, | ||
{ text = "The circuit requires an inductor", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Design a simple LED driver circuit using a transistor and a current-limiting resistor. | ||
""" | ||
questions = [ | ||
{ text = "The circuit includes a transistor", answer = true }, | ||
{ text = "There is a current-limiting resistor for the LED", answer = true }, | ||
{ text = "The LED brightness can be controlled by the transistor's base current", answer = true }, | ||
{ text = "The circuit requires an operational amplifier", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Create a basic astable multivibrator circuit using two transistors to generate a square wave output. | ||
""" | ||
questions = [ | ||
{ text = "The circuit uses two transistors", answer = true }, | ||
{ text = "The output is a square wave", answer = true }, | ||
{ text = "The circuit includes capacitors for timing", answer = true }, | ||
{ text = "The circuit requires an external clock signal", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Design a voltage divider circuit to convert a 12V input to a 5V output. | ||
""" | ||
questions = [ | ||
{ text = "The circuit uses two resistors", answer = true }, | ||
{ text = "The output voltage is 5V", answer = true }, | ||
{ text = "The input voltage is 12V", answer = true }, | ||
{ text = "The circuit requires an operational amplifier", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Create a simple audio amplifier circuit using a single transistor. | ||
""" | ||
questions = [ | ||
{ text = "The circuit includes a transistor", answer = true }, | ||
{ text = "There is a coupling capacitor at the input", answer = true }, | ||
{ text = "The circuit can amplify small audio signals", answer = true }, | ||
{ text = "The circuit requires multiple power supply voltages", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Design a basic full-wave bridge rectifier circuit to convert AC to DC. | ||
""" | ||
questions = [ | ||
{ text = "The circuit uses four diodes", answer = true }, | ||
{ text = "The output is pulsating DC", answer = true }, | ||
{ text = "A capacitor can be added for smoothing", answer = true }, | ||
{ text = "The circuit requires a transformer", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Create a simple light-sensitive circuit using a photoresistor (LDR) to control an LED. | ||
""" | ||
questions = [ | ||
{ text = "The circuit includes a photoresistor", answer = true }, | ||
{ text = "The LED brightness changes with ambient light", answer = true }, | ||
{ text = "The circuit uses a transistor for switching", answer = true }, | ||
{ text = "The circuit requires an operational amplifier", answer = false } | ||
] | ||
|
||
[[problems]] | ||
prompt = """ | ||
Design a basic Wien bridge oscillator circuit to generate a sine wave output. | ||
""" | ||
questions = [ | ||
{ text = "The circuit uses an operational amplifier", answer = true }, | ||
{ text = "The output is a sine wave", answer = true }, | ||
{ text = "The circuit includes resistors and capacitors for frequency determination", answer = true }, | ||
{ text = "The circuit requires an external clock signal", answer = false } | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
import Anthropic from "@anthropic-ai/sdk" | ||
import dotenv from "dotenv" | ||
dotenv.config() | ||
|
||
export const anthropic = new Anthropic() | ||
export const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY }) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters