Skip to content

Commit

Permalink
copy files + repeat eval
Browse files Browse the repository at this point in the history
  • Loading branch information
laanak08 authored and zakiali committed Feb 28, 2025
1 parent 0229df4 commit a6b6d3b
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 23 deletions.
10 changes: 9 additions & 1 deletion crates/goose-bench/src/eval_suites/core/example.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
use std::fs;

pub struct ExampleEval {}

Expand All @@ -12,8 +14,14 @@ impl ExampleEval {

#[async_trait]
impl Evaluation for ExampleEval {
async fn run(&self, mut agent: Box<dyn BenchAgent>) -> anyhow::Result<Vec<EvaluationMetric>> {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<EvaluationMetric>> {
println!("ExampleEval - run");
let f = work_dir.fs_get(String::from("./arbitrary_dir/arbitrary_file.txt"))?;
let _contents = fs::read_to_string(f)?;
let metrics = Vec::new();
let _ = agent.prompt("What can you do?".to_string()).await;
Ok(metrics)
Expand Down
7 changes: 6 additions & 1 deletion crates/goose-bench/src/eval_suites/evaluation.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::work_dir::WorkDir;
use anyhow::Result;
use async_trait::async_trait;
use goose::message::Message;
Expand All @@ -20,6 +21,10 @@ pub trait BenchAgent: Send + Sync {

#[async_trait]
pub trait Evaluation: Send + Sync {
async fn run(&self, agent: Box<dyn BenchAgent>) -> Result<Vec<EvaluationMetric>>;
async fn run(
&self,
agent: Box<dyn BenchAgent>,
run_loc: &mut WorkDir,
) -> Result<Vec<EvaluationMetric>>;
fn name(&self) -> &str;
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;

pub struct FlappyBird {}
Expand All @@ -12,7 +13,11 @@ impl FlappyBird {

#[async_trait]
impl Evaluation for FlappyBird {
async fn run(&self, mut agent: Box<dyn BenchAgent>) -> anyhow::Result<Vec<EvaluationMetric>> {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
_: &mut WorkDir,
) -> anyhow::Result<Vec<EvaluationMetric>> {
println!("FlappyBird - run");
let metrics = Vec::new();
let _ = agent.prompt("What can you do?".to_string()).await;
Expand Down
92 changes: 86 additions & 6 deletions crates/goose-bench/src/work_dir.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,113 @@
use std::fs;
use std::io;
use std::path::Path;
use std::path::PathBuf;

pub struct WorkDir {
pub path: PathBuf,
traversal: Vec<PathBuf>,
}

impl Default for WorkDir {
fn default() -> Self {
let path = PathBuf::from(".").canonicalize().unwrap();
WorkDir {
path: PathBuf::from("."),
path: path.clone(),
traversal: vec![path.clone()],
}
}
}
impl WorkDir {
pub fn new(path: &str) -> Self {
let path = PathBuf::from(path);
WorkDir {
path: PathBuf::from(path),
path: path.clone(),
traversal: vec![path.clone()],
}
}

pub fn work_from(path: String) -> anyhow::Result<WorkDir> {
let _ = fs::create_dir_all(&path)?;
let _ = std::env::set_current_dir(&path)?;
Ok(WorkDir::new(path.as_str()))
pub fn at(path: String, include_dirs: Vec<PathBuf>) -> anyhow::Result<WorkDir> {
fs::create_dir_all(&path)?;

let dirs = include_dirs
.iter()
.map(|d| d.canonicalize().unwrap())
.collect::<Vec<_>>();

let p = PathBuf::from(&path).canonicalize()?;
let _: Vec<_> = dirs
.iter()
.map(|d| WorkDir::deep_copy(d.as_path(), p.as_path()))
.collect();

std::env::set_current_dir(&path)?;

Ok(WorkDir::new(p.to_string_lossy().to_string().as_str()))
}
pub fn move_to(&mut self, path: String) -> anyhow::Result<&mut Self> {
fs::create_dir_all(&path)?;
self.traversal.push(PathBuf::from(&path));
std::env::set_current_dir(&path)?;
Ok(self)
}

pub fn fs_get(&mut self, path: String) -> anyhow::Result<PathBuf> {
let p = Path::new(&path);
if !p.exists() {
let artifact_at_root = if p.is_dir() {
self.traversal[0].clone().join(&path).canonicalize()?
} else {
self.traversal[0]
.clone()
.join(p.parent().unwrap_or(Path::new("")))
.canonicalize()?
};

let here = PathBuf::from(".").canonicalize()?;

WorkDir::deep_copy(artifact_at_root.as_path(), here.as_path())?;
}

Ok(PathBuf::from(path))
}

fn deep_copy(src: &Path, dst: &Path) -> io::Result<()> {
// Create the destination directory with the source's name
let dst_dir = if let Some(src_name) = src.file_name() {
dst.join(src_name)
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"Source path must have a file name",
));
};

// Create the destination directory if it doesn't exist
if !dst_dir.exists() {
fs::create_dir_all(&dst_dir)?;
}

// Copy each entry in the source directory
for entry in fs::read_dir(src)? {
let entry = entry?;
let ty = entry.file_type()?;
let src_path = entry.path();
let dst_path = dst_dir.join(entry.file_name());

if ty.is_dir() {
WorkDir::deep_copy(&src_path, dst_path.parent().unwrap())?;
} else {
fs::copy(&src_path, &dst_path)?;
}
}

Ok(())
}
}

impl Drop for WorkDir {
fn drop(&mut self) {
self.traversal.pop();
std::env::set_current_dir("..").unwrap()
}
}
31 changes: 20 additions & 11 deletions crates/goose-cli/src/commands/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use goose::config::Config;
use goose::message::Message;
use goose_bench::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, EvaluationSuiteFactory};
use goose_bench::work_dir::WorkDir;
use std::path::PathBuf;

#[async_trait]
impl BenchAgent for Session {
Expand All @@ -16,29 +17,34 @@ impl BenchAgent for Session {
}
}

async fn run_eval(evaluation: Box<dyn Evaluation>) -> anyhow::Result<Vec<EvaluationMetric>> {
if let Ok(_) = WorkDir::work_from(format!("./{}", &evaluation.name())) {
#[allow(clippy::redundant_pattern_matching)]
async fn run_eval(
evaluation: Box<dyn Evaluation>,
work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<EvaluationMetric>> {
if let Ok(work_dir) = work_dir.move_to(format!("./{}", &evaluation.name())) {
let session = build_session(None, false, Vec::new(), Vec::new()).await;
let report = evaluation.run(Box::new(session)).await;
report
evaluation.run(Box::new(session), work_dir).await
} else {
Ok(vec![])
}
}

async fn run_suite(suite: &str) -> anyhow::Result<()> {
if let Ok(_) = WorkDir::work_from(format!("./{}", &suite)) {
#[allow(clippy::redundant_pattern_matching)]
async fn run_suite(suite: &str, work_dir: &mut WorkDir) -> anyhow::Result<()> {
if let Ok(work_dir) = work_dir.move_to(format!("./{}", &suite)) {
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
for eval in evals {
run_eval(eval).await?;
run_eval(eval, work_dir).await?;
}
}
}

Ok(())
}

pub async fn run_benchmark(suites: Vec<String>) -> anyhow::Result<()> {
#[allow(clippy::redundant_pattern_matching)]
pub async fn run_benchmark(suites: Vec<String>, include_dirs: Vec<PathBuf>) -> anyhow::Result<()> {
let suites = EvaluationSuiteFactory::available_evaluations()
.into_iter()
.filter(|&s| suites.contains(&s.to_string()))
Expand All @@ -51,10 +57,13 @@ pub async fn run_benchmark(suites: Vec<String>) -> anyhow::Result<()> {

let current_time = Local::now().format("%H:%M:%S").to_string();
let current_date = Local::now().format("%Y-%m-%d").to_string();
if let Ok(_) = WorkDir::work_from(format!("./benchmark-{}", &provider_name)) {
if let Ok(_) = WorkDir::work_from(format!("./{}-{}", &current_date, current_time)) {
if let Ok(mut work_dir) = WorkDir::at(
format!("./benchmark-{}", &provider_name),
include_dirs.clone(),
) {
if let Ok(work_dir) = work_dir.move_to(format!("./{}-{}", &current_date, current_time)) {
for suite in suites {
run_suite(suite).await?;
run_suite(suite, work_dir).await?;
}
}
}
Expand Down
33 changes: 30 additions & 3 deletions crates/goose-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,24 @@ enum Command {
value_delimiter = ','
)]
suites: Vec<String>,

#[arg(
short = 'i',
long = "include-dir",
value_name = "DIR_NAME",
action = clap::ArgAction::Append,
long_help = "Make one or more dirs available to all bench suites. Specify either a single dir-name, a comma-separated list of dir-names, or use this multiple instances of this flag to specify multiple dirs.",
value_delimiter = ','
)]
include_dirs: Vec<PathBuf>,

#[arg(
long = "repeat",
value_name = "QUANTITY",
long_help = "Number of times to repeat the benchmark run.",
default_value = "1"
)]
repeat: usize,
},
}

Expand Down Expand Up @@ -306,9 +324,18 @@ async fn main() -> Result<()> {
}
Some(Command::Bench {
suites,
}) => {
let suites = if suites.is_empty() { vec!["core".to_string()] } else { suites };
run_benchmark(suites).await;
include_dirs,
repeat,
}) => {
let suites = if suites.is_empty() {
vec!["core".to_string()]
} else {
suites
};

for _ in 0..repeat {
let _ = run_benchmark(suites.clone(), include_dirs.clone()).await;
}
return Ok(());
}
None => {
Expand Down

0 comments on commit a6b6d3b

Please sign in to comment.