Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow globs to be passed into yek #111

Merged
merged 6 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions src/parallel.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::{config::YekConfig, priority::get_file_priority, Result};
use content_inspector::{inspect, ContentType};
use glob::glob;
use ignore::gitignore::GitignoreBuilder;
use path_slash::PathBufExt;
use rayon::prelude::*;
Expand Down Expand Up @@ -81,12 +82,42 @@ pub fn process_files_parallel(
config: &YekConfig,
boost_map: &HashMap<String, i32>,
) -> Result<Vec<ProcessedFile>> {
// If it's a file, process it directly
if base_path.is_file() {
return process_single_file(base_path, config, boost_map);
// Expand globs into a list of paths
let mut expanded_paths = Vec::new();
let path_str = base_path.to_string_lossy();
for entry in glob(&path_str)? {
match entry {
Ok(path) => expanded_paths.push(path),
Err(e) => debug!("Glob entry error: {:?}", e),
}
}

// If it's a single file (no glob expansion or single file result), process it directly
if expanded_paths.len() == 1 && expanded_paths[0].is_file() {
return process_single_file(&expanded_paths[0], config, boost_map);
}

// Iterate over expanded paths, handling files and directories
let mut all_processed_files = Vec::new();
for path in expanded_paths {
if path.is_file() {
all_processed_files.extend(process_single_file(&path, config, boost_map)?);
} else if path.is_dir() {
// For directories, use the original recursive logic
all_processed_files.extend(process_files_parallel_internal(&path, config, boost_map)?);
}
}

// Otherwise, it's a directory, so walk it
Ok(all_processed_files)
}

/// Internal function to handle directory recursion (separated for clarity)
fn process_files_parallel_internal(
base_path: &Path,
config: &YekConfig,
boost_map: &HashMap<String, i32>,
) -> Result<Vec<ProcessedFile>> {
// It's a directory, so walk it
let mut walk_builder = ignore::WalkBuilder::new(base_path);

// Standard filters + no follow symlinks
Expand Down
10 changes: 5 additions & 5 deletions tests/config_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,11 @@ fn test_extend_config_with_defaults() {

assert_eq!(cfg.input_paths, input_paths);
assert_eq!(cfg.output_dir, Some(output_dir));
assert_eq!(cfg.version, false);
assert!(!cfg.version);
assert_eq!(cfg.max_size, "10MB".to_string());
assert_eq!(cfg.tokens, String::new());
assert_eq!(cfg.json, false);
assert_eq!(cfg.debug, false);
assert!(!cfg.json);
assert!(!cfg.debug);
assert_eq!(cfg.output_template, DEFAULT_OUTPUT_TEMPLATE.to_string());
assert_eq!(cfg.ignore_patterns, Vec::<String>::new());
assert_eq!(cfg.unignore_patterns, Vec::<String>::new());
Expand All @@ -355,8 +355,8 @@ fn test_extend_config_with_defaults() {
.collect::<Vec<_>>()
);
assert_eq!(cfg.git_boost_max, Some(100));
assert_eq!(cfg.stream, false);
assert_eq!(cfg.token_mode, false);
assert!(!cfg.stream);
assert!(!cfg.token_mode);
assert_eq!(cfg.output_file_full_path, None);
assert_eq!(cfg.max_git_depth, 100);
}
Expand Down
76 changes: 75 additions & 1 deletion tests/e2e_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ mod e2e_tests {
assert!(output.status.success());

// Ensure output dir is printed in stdout
let stdout = String::from_utf8_lossy(&output.stdout);
let stdout = String::from_utf8(output.stdout)?;
assert!(
stdout.contains(&output_dir.display().to_string()),
"Expected output directory `{}` to be printed in stdout, but it was {}",
Expand Down Expand Up @@ -215,6 +215,80 @@ mod e2e_tests {
Ok(())
}

#[test]
fn test_glob_pattern() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;

Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.assert()
.success();

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(stdout.contains("Test content"));
Ok(())
}

#[test]
fn test_mix_of_files_and_dirs() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;
fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?;
let dir = temp_dir.path().join("dir");
fs::create_dir(&dir)?;
fs::write(dir.join("test3"), "Test content 3")?;

Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.assert()
.success();

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(stdout.contains("Test content"));
assert!(stdout.contains("Test content 2"));
assert!(!stdout.contains("Test content 3"));
Ok(())
}

#[test]
fn test_mix_of_files_and_dirs_with_glob_pattern() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;
fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?;
fs::write(temp_dir.path().join("code.rs"), "use std::fs;")?;
let dir = temp_dir.path().join("dir");
fs::create_dir(&dir)?;
fs::write(dir.join("test4"), "Test content 4")?;

Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.args(["*.txt", "code.rs"])
.assert()
.success();

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.args(["*.txt", "code.rs"])
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(stdout.contains("Test content"));
assert!(stdout.contains("Test content 2"));
assert!(!stdout.contains("Test content 4"));
assert!(stdout.contains("use std::fs;"));
Ok(())
}

#[test]
fn test_config_file() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
Expand Down
109 changes: 106 additions & 3 deletions tests/parallel_test.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use anyhow::Result;
use normalize_path::NormalizePath;
use std::collections::HashMap;
use std::fs;
use std::fs::{self, File};
use std::io::Write;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::path::{Path, PathBuf};
use tempfile::tempdir;
use yek::config::YekConfig;
use yek::parallel::process_files_parallel;
Expand Down Expand Up @@ -62,7 +64,6 @@ fn test_process_files_parallel_empty() {

#[test]
fn test_process_files_parallel_with_files() {
use std::fs;
let temp_dir = tempdir().expect("failed to create temp dir");
let file_names = vec!["a.txt", "b.txt", "c.txt"];
for &file in &file_names {
Expand Down Expand Up @@ -135,3 +136,105 @@ fn test_process_files_parallel_walk_error() {
let processed_files = result.unwrap();
assert_eq!(processed_files.len(), 0); // No files processed due to walk error
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_glob_pattern_single_file() -> Result<()> {
let temp_dir = tempdir()?;
let file_path = temp_dir.path().join("test.txt");
let mut file = File::create(&file_path)?;
writeln!(file, "Test content")?;

let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 1);
assert_eq!(result[0].rel_path, "test.txt");

Ok(())
}

#[test]
fn test_glob_pattern_multiple_files() -> Result<()> {
let temp_dir = tempdir()?;

// Create multiple test files
let files = vec!["test1.txt", "test2.txt", "other.md"];
for fname in &files {
let file_path = temp_dir.path().join(fname);
let mut file = File::create(&file_path)?;
writeln!(file, "Test content for {}", fname)?;
}

let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 2); // Should only match .txt files

let paths: Vec<String> = result.iter().map(|f| f.rel_path.clone()).collect();
assert!(paths.contains(&"test1.txt".to_string()));
assert!(paths.contains(&"test2.txt".to_string()));

Ok(())
}

#[test]
fn test_glob_pattern_nested_directories() -> Result<()> {
let temp_dir = tempdir()?;

// Create nested directory structure
let nested_dir = temp_dir.path().join("nested");
fs::create_dir(&nested_dir)?;

// Create files in both root and nested directory
let root_file = temp_dir.path().join("root.txt");
let nested_file = nested_dir.join("nested.txt");
let other_file = temp_dir.path().join("other.md");

for (path, content) in [
(&root_file, "Root content"),
(&nested_file, "Nested content"),
(&other_file, "Other content"),
] {
let mut file = File::create(path)?;
writeln!(file, "{}", content)?;
}

let glob_pattern = temp_dir
.path()
.join("**/*.txt")
.to_string_lossy()
.to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 2); // Should match both .txt files

let paths: Vec<String> = result.iter().map(|f| f.rel_path.clone()).collect();
assert!(paths.contains(&"root.txt".to_string()));
assert!(paths.contains(&"nested.txt".to_string()));

Ok(())
}

#[test]
fn test_glob_pattern_no_matches() -> Result<()> {
let temp_dir = tempdir()?;
let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert!(result.is_empty());

Ok(())
}
}