Skip to content

Commit

Permalink
Merge pull request #9 from ShiboSoftwareDev/main
Browse files Browse the repository at this point in the history
new benchmarking tool: evalite
  • Loading branch information
ShiboSoftwareDev authored Dec 7, 2024
2 parents bf36355 + 855b15a commit 3c44e2f
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 18 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,6 @@ dist
.DS_Store
.vscode
.aider*

# Evalite
evalite-report.jsonl
72 changes: 72 additions & 0 deletions benchmarks-evalite/benchmark.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import fs from 'fs';
import path from 'path';
import toml from 'toml';
import { anthropic } from '../lib/code-runner/anthropic';
import { safeEvaluateCode } from '../lib/code-runner/safe-evaluate-code';
import { askAboutOutput } from '../tests/fixtures/ask-about-output';
import { createCircuitBoard1Template } from '../prompt-templates/create-circuit-board1';
import { evalite } from "evalite";
import { Levenshtein } from "autoevals";

interface Problem {
prompt: string;
questions: { text: string; answer: boolean }[];
}

const loadProblems = (filePath: string): Problem[] => {
const tomlContent = fs.readFileSync(filePath, 'utf-8');
const parsedToml = toml.parse(tomlContent);

return parsedToml.problems.map((problem: any) => ({
prompt: problem.prompt,
questions: problem.questions.map((q: any) => ({
text: q.text,
answer: q.answer
}))
}));
};

const runAI = async (prompt: string): Promise<string> => {
const fullPrompt = createCircuitBoard1Template({
currentCode: "",
availableImports: {}
}) + "\n\n" + prompt;
const completion = await anthropic.messages.create({
model: 'claude-3-5-haiku-20241022',
max_tokens: 1024,
system: "You are an expert in electronic circuit design and tscircuit.",
messages: [
{
role: 'user',
content: fullPrompt,
},
],
});

return (completion as any).content[0]?.text || '';
};

const problems = loadProblems(path.join(__dirname, './problems.toml'));
let problemNumber = 0;
for (const problem of problems) {
problemNumber++
evalite(`problem: ${problemNumber}`, {
data: async () => {
const aiResponse = await runAI(problem.prompt);
const codeMatch = aiResponse.match(/```tsx\s*([\s\S]*?)\s*```/);
const code = codeMatch ? codeMatch[1].trim() : '';
const evaluation = safeEvaluateCode(code, {
outputType: 'board',
preSuppliedImports: {},
});
return problem.questions.map(question => ({ input: { code: evaluation.success ? code : null, question: question.text }, expected: question.answer.toString() }));
},
task: async (input) => {
if (!input.code)
return ""
const answer = await askAboutOutput(input.code, input.question);
return answer.toString();
},
scorers: [Levenshtein],
});
}
109 changes: 109 additions & 0 deletions benchmarks-evalite/problems.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
[[problems]]
prompt = """
Create a 555 timer with a blinking LED.
"""
questions = [
{ text = "The circuit includes a 555 timer IC", answer = true },
{ text = "The LED blinks at a frequency of 1 Hz", answer = true },
{ text = "The circuit uses a resistor and capacitor to set the timing", answer = true },
{ text = "The LED is connected directly to the 555 timer output", answer = false }
]

[[problems]]
prompt = """
Design a simple LED driver circuit using a transistor and a current-limiting resistor.
"""
questions = [
{ text = "The circuit includes a transistor", answer = true },
{ text = "There is a current-limiting resistor for the LED", answer = true },
{ text = "The LED brightness can be controlled by the transistor's base current", answer = true },
{ text = "The circuit requires an operational amplifier", answer = false }
]

[[problems]]
prompt = """
Create a basic RC low-pass filter circuit with a cutoff frequency of 1 kHz.
"""
questions = [
{ text = "The circuit includes a resistor and a capacitor", answer = true },
{ text = "The cutoff frequency is 1 kHz", answer = true },
{ text = "The circuit attenuates high-frequency signals", answer = true },
{ text = "The circuit requires an inductor", answer = false }
]

[[problems]]
prompt = """
Design a simple LED driver circuit using a transistor and a current-limiting resistor.
"""
questions = [
{ text = "The circuit includes a transistor", answer = true },
{ text = "There is a current-limiting resistor for the LED", answer = true },
{ text = "The LED brightness can be controlled by the transistor's base current", answer = true },
{ text = "The circuit requires an operational amplifier", answer = false }
]

[[problems]]
prompt = """
Create a basic astable multivibrator circuit using two transistors to generate a square wave output.
"""
questions = [
{ text = "The circuit uses two transistors", answer = true },
{ text = "The output is a square wave", answer = true },
{ text = "The circuit includes capacitors for timing", answer = true },
{ text = "The circuit requires an external clock signal", answer = false }
]

[[problems]]
prompt = """
Design a voltage divider circuit to convert a 12V input to a 5V output.
"""
questions = [
{ text = "The circuit uses two resistors", answer = true },
{ text = "The output voltage is 5V", answer = true },
{ text = "The input voltage is 12V", answer = true },
{ text = "The circuit requires an operational amplifier", answer = false }
]

[[problems]]
prompt = """
Create a simple audio amplifier circuit using a single transistor.
"""
questions = [
{ text = "The circuit includes a transistor", answer = true },
{ text = "There is a coupling capacitor at the input", answer = true },
{ text = "The circuit can amplify small audio signals", answer = true },
{ text = "The circuit requires multiple power supply voltages", answer = false }
]

[[problems]]
prompt = """
Design a basic full-wave bridge rectifier circuit to convert AC to DC.
"""
questions = [
{ text = "The circuit uses four diodes", answer = true },
{ text = "The output is pulsating DC", answer = true },
{ text = "A capacitor can be added for smoothing", answer = true },
{ text = "The circuit requires a transformer", answer = false }
]

[[problems]]
prompt = """
Create a simple light-sensitive circuit using a photoresistor (LDR) to control an LED.
"""
questions = [
{ text = "The circuit includes a photoresistor", answer = true },
{ text = "The LED brightness changes with ambient light", answer = true },
{ text = "The circuit uses a transistor for switching", answer = true },
{ text = "The circuit requires an operational amplifier", answer = false }
]

[[problems]]
prompt = """
Design a basic Wien bridge oscillator circuit to generate a sine wave output.
"""
questions = [
{ text = "The circuit uses an operational amplifier", answer = true },
{ text = "The output is a sine wave", answer = true },
{ text = "The circuit includes resistors and capacitors for frequency determination", answer = true },
{ text = "The circuit requires an external clock signal", answer = false }
]
55 changes: 55 additions & 0 deletions benchmarks/problems.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,58 @@ questions = [
{ text = "The circuit includes capacitors for timing", answer = true },
{ text = "The circuit requires an external clock signal", answer = false }
]

[[problems]]
prompt = """
Design a voltage divider circuit to convert a 12V input to a 5V output.
"""
questions = [
{ text = "The circuit uses two resistors", answer = true },
{ text = "The output voltage is 5V", answer = true },
{ text = "The input voltage is 12V", answer = true },
{ text = "The circuit requires an operational amplifier", answer = false }
]

[[problems]]
prompt = """
Create a simple audio amplifier circuit using a single transistor.
"""
questions = [
{ text = "The circuit includes a transistor", answer = true },
{ text = "There is a coupling capacitor at the input", answer = true },
{ text = "The circuit can amplify small audio signals", answer = true },
{ text = "The circuit requires multiple power supply voltages", answer = false }
]

[[problems]]
prompt = """
Design a basic full-wave bridge rectifier circuit to convert AC to DC.
"""
questions = [
{ text = "The circuit uses four diodes", answer = true },
{ text = "The output is pulsating DC", answer = true },
{ text = "A capacitor can be added for smoothing", answer = true },
{ text = "The circuit requires a transformer", answer = false }
]

[[problems]]
prompt = """
Create a simple light-sensitive circuit using a photoresistor (LDR) to control an LED.
"""
questions = [
{ text = "The circuit includes a photoresistor", answer = true },
{ text = "The LED brightness changes with ambient light", answer = true },
{ text = "The circuit uses a transistor for switching", answer = true },
{ text = "The circuit requires an operational amplifier", answer = false }
]

[[problems]]
prompt = """
Design a basic Wien bridge oscillator circuit to generate a sine wave output.
"""
questions = [
{ text = "The circuit uses an operational amplifier", answer = true },
{ text = "The output is a sine wave", answer = true },
{ text = "The circuit includes resistors and capacitors for frequency determination", answer = true },
{ text = "The circuit requires an external clock signal", answer = false }
]
Binary file modified bun.lockb
Binary file not shown.
4 changes: 3 additions & 1 deletion lib/code-runner/anthropic.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import Anthropic from "@anthropic-ai/sdk"
import dotenv from "dotenv"
dotenv.config()

export const anthropic = new Anthropic()
export const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY })
13 changes: 10 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
"author": "Severin Ibarluzea",
"scripts": {
"build": "tsup-node lib/index.ts lib/code-runner/index.ts lib/code-runner-utils/index.ts --format esm --dts",
"test": "bun test --timeout 60000"
"test": "bun test --timeout 60000",
"eval": "evalite",
"eval:watch": "evalite watch"
},
"exports": {
"./code-runner": "./dist/code-runner/index.js",
Expand All @@ -20,15 +22,19 @@
"devDependencies": {
"@anthropic-ai/sdk": "^0.31.0",
"@biomejs/biome": "^1.9.3",
"@tscircuit/core": "^0.0.153",
"@tscircuit/core": "^0.0.218",
"@types/babel__standalone": "^7.1.7",
"@types/bun": "latest",
"@types/debug": "^4.1.12",
"@types/react": "^18.3.11",
"@typescript/ata": "^0.9.7",
"@typescript/vfs": "^1.6.0",
"autoevals": "^0.0.108",
"evalite": "^0.3.0",
"react": "^18.3.1",
"tsup": "^8.3.0"
"tsup": "^8.3.0",
"vite": "^6.0.3",
"vitest": "^2.1.8"
},
"peerDependencies": {
"typescript": "^5.0.0"
Expand All @@ -37,6 +43,7 @@
"@babel/standalone": "^7.25.7",
"@tscircuit/featured-snippets": "^0.0.1",
"debug": "^4.3.7",
"dotenv": "^16.4.7",
"extract-codefence": "^0.0.4",
"toml": "^3.0.0"
}
Expand Down
27 changes: 13 additions & 14 deletions prompt-templates/create-circuit-board1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ pinrow10
tssop20_p0.5mm
sot23
### Notes
- Any component may have a pcbX and/or a pcbY representing the center of the
Expand Down Expand Up @@ -80,10 +81,9 @@ Examples:
<trace from=".U1 .D3" to=".U1 .GND" />
<trace from=".U1 .D2" to="net.VCC" />
${
!availableImports
? ""
: `### Importing Components
${!availableImports
? ""
: `### Importing Components
You can import a variety of components from the tscircuit registry. tscircuit
registry components are always prefixed with \`@tsci/\`. Make sure to include
Expand All @@ -93,22 +93,21 @@ If you are not told explicitly that an import exists, do not import it.
#### Available Imports
${
!availableImports
? "There are no available imports."
: Object.entries(availableImports)
.map(([name, description]) =>
`
${!availableImports
? "There are no available imports."
: Object.entries(availableImports)
.map(([name, description]) =>
`
##### \`${name}\`
${description}
`.trim(),
)
.join("\n")
}
)
.join("\n")
}
`
}
}
### Quirks
Expand Down

0 comments on commit 3c44e2f

Please sign in to comment.