diff --git a/CHANGELOG.md b/CHANGELOG.md index e252fbd..bd62ba6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.21.0] - 2025-02-23 +[0.21.0]: https://github.com/bodo-run/yek/compare/v0.20.0...v0.21.0 +### Bug Fixes + +- Glob pattern handling in e2e tests + +### Documentation + +- Update README with glob pattern and file selection support + +### Features + +- Handle glob patterns in input paths + +### Testing + +- Add comprehensive tests for glob pattern support + +### Ci + +- Run release and publish jobs on main branch +- Only run release and publish on tag pushes +- Trigger release on tag merge to main + ## [0.20.0] - 2025-02-22 [0.20.0]: https://github.com/bodo-run/yek/compare/v0.19.0...v0.20.0 ### Bug Fixes @@ -24,6 +48,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add integration test +### Release + +- V0.20.0 + ## [0.19.0] - 2025-02-19 [0.19.0]: https://github.com/bodo-run/yek/compare/v0.18.0...v0.19.0 ### Bug Fixes diff --git a/Cargo.lock b/Cargo.lock index 71666bf..d6025bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4017,7 +4017,7 @@ dependencies = [ [[package]] name = "yek" -version = "0.20.0" +version = "0.21.0" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index f1f0319..2a72983 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yek" -version = "0.20.0" +version = "0.21.0" edition = "2021" description = "A tool to serialize a repository into chunks of text files" license = "MIT" diff --git a/README.md b/README.md index ca0cf87..f4ec2e0 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ By default: - Infers additional ignore patterns (binary, large, etc.). - Automatically detects if output is being piped and streams content instead of writing to files. - Supports processing multiple directories in a single command. +- Supports glob patterns and individual file selection. - Configurable via a `yek.yaml` file. Yek يک means "One" in Farsi/Persian. @@ -113,6 +114,25 @@ Process multiple directories: yek src/ tests/ ``` +Porcess multiple files + +```bash +yek file1.txt file2.txt file3.txt +``` + +Use glob patterns: + +```bash +yek "src/**/*.ts" +``` + +```bash +yek "src/main.rs" "tests/*.rs" "docs/README.md" +``` + +> [!NOTE] +> When using glob patterns, make sure to quote them to prevent shell expansion. + ### CLI Reference ```bash diff --git a/src/parallel.rs b/src/parallel.rs index 4d034ed..71404d2 100644 --- a/src/parallel.rs +++ b/src/parallel.rs @@ -1,5 +1,6 @@ use crate::{config::YekConfig, priority::get_file_priority, Result}; use content_inspector::{inspect, ContentType}; +use glob::glob; use ignore::gitignore::GitignoreBuilder; use path_slash::PathBufExt; use rayon::prelude::*; @@ -81,12 +82,42 @@ pub fn process_files_parallel( config: &YekConfig, boost_map: &HashMap, ) -> Result> { - // If it's a file, process it directly - if base_path.is_file() { - return process_single_file(base_path, config, boost_map); + // Expand globs into a list of paths + let mut expanded_paths = Vec::new(); + let path_str = base_path.to_string_lossy(); + for entry in glob(&path_str)? { + match entry { + Ok(path) => expanded_paths.push(path), + Err(e) => debug!("Glob entry error: {:?}", e), + } + } + + // If it's a single file (no glob expansion or single file result), process it directly + if expanded_paths.len() == 1 && expanded_paths[0].is_file() { + return process_single_file(&expanded_paths[0], config, boost_map); + } + + // Iterate over expanded paths, handling files and directories + let mut all_processed_files = Vec::new(); + for path in expanded_paths { + if path.is_file() { + all_processed_files.extend(process_single_file(&path, config, boost_map)?); + } else if path.is_dir() { + // For directories, use the original recursive logic + all_processed_files.extend(process_files_parallel_internal(&path, config, boost_map)?); + } } - // Otherwise, it's a directory, so walk it + Ok(all_processed_files) +} + +/// Internal function to handle directory recursion (separated for clarity) +fn process_files_parallel_internal( + base_path: &Path, + config: &YekConfig, + boost_map: &HashMap, +) -> Result> { + // It's a directory, so walk it let mut walk_builder = ignore::WalkBuilder::new(base_path); // Standard filters + no follow symlinks diff --git a/tests/config_test.rs b/tests/config_test.rs index 8d7f6d5..602ddfe 100644 --- a/tests/config_test.rs +++ b/tests/config_test.rs @@ -338,11 +338,11 @@ fn test_extend_config_with_defaults() { assert_eq!(cfg.input_paths, input_paths); assert_eq!(cfg.output_dir, Some(output_dir)); - assert_eq!(cfg.version, false); + assert!(!cfg.version); assert_eq!(cfg.max_size, "10MB".to_string()); assert_eq!(cfg.tokens, String::new()); - assert_eq!(cfg.json, false); - assert_eq!(cfg.debug, false); + assert!(!cfg.json); + assert!(!cfg.debug); assert_eq!(cfg.output_template, DEFAULT_OUTPUT_TEMPLATE.to_string()); assert_eq!(cfg.ignore_patterns, Vec::::new()); assert_eq!(cfg.unignore_patterns, Vec::::new()); @@ -355,8 +355,8 @@ fn test_extend_config_with_defaults() { .collect::>() ); assert_eq!(cfg.git_boost_max, Some(100)); - assert_eq!(cfg.stream, false); - assert_eq!(cfg.token_mode, false); + assert!(!cfg.stream); + assert!(!cfg.token_mode); assert_eq!(cfg.output_file_full_path, None); assert_eq!(cfg.max_git_depth, 100); } diff --git a/tests/e2e_test.rs b/tests/e2e_test.rs index 7848cd5..b2854aa 100644 --- a/tests/e2e_test.rs +++ b/tests/e2e_test.rs @@ -111,7 +111,7 @@ mod e2e_tests { assert!(output.status.success()); // Ensure output dir is printed in stdout - let stdout = String::from_utf8_lossy(&output.stdout); + let stdout = String::from_utf8(output.stdout)?; assert!( stdout.contains(&output_dir.display().to_string()), "Expected output directory `{}` to be printed in stdout, but it was {}", @@ -215,6 +215,75 @@ mod e2e_tests { Ok(()) } + #[test] + fn test_glob_pattern() -> Result<(), Box> { + let temp_dir = tempdir()?; + fs::write(temp_dir.path().join("test.txt"), "Test content")?; + + let output = Command::cargo_bin("yek")? + .current_dir(temp_dir.path()) + .arg("*.txt") + .output()?; + let stdout = String::from_utf8(output.stdout)?; + assert!(output.status.success()); + assert!(stdout.contains("Test content")); + Ok(()) + } + + #[test] + fn test_mix_of_files_and_dirs() -> Result<(), Box> { + let temp_dir = tempdir()?; + fs::write(temp_dir.path().join("test.txt"), "Test content")?; + fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?; + let dir = temp_dir.path().join("dir"); + fs::create_dir(&dir)?; + fs::write(dir.join("test3"), "Test content 3")?; + + Command::cargo_bin("yek")? + .current_dir(temp_dir.path()) + .arg("*.txt") + .assert() + .success(); + + let output = Command::cargo_bin("yek")? + .current_dir(temp_dir.path()) + .arg("*.txt") + .output()?; + let stdout = String::from_utf8(output.stdout)?; + assert!(stdout.contains("Test content")); + assert!(stdout.contains("Test content 2")); + assert!(!stdout.contains("Test content 3")); + Ok(()) + } + + #[test] + fn test_mix_of_files_and_dirs_with_glob_pattern() -> Result<(), Box> { + let temp_dir = tempdir()?; + fs::write(temp_dir.path().join("test.txt"), "Test content")?; + fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?; + fs::write(temp_dir.path().join("code.rs"), "use std::fs;")?; + let dir = temp_dir.path().join("dir"); + fs::create_dir(&dir)?; + fs::write(dir.join("test4"), "Test content 4")?; + + Command::cargo_bin("yek")? + .current_dir(temp_dir.path()) + .args(["*.txt", "code.rs"]) + .assert() + .success(); + + let output = Command::cargo_bin("yek")? + .current_dir(temp_dir.path()) + .args(["*.txt", "code.rs"]) + .output()?; + let stdout = String::from_utf8(output.stdout)?; + assert!(stdout.contains("Test content")); + assert!(stdout.contains("Test content 2")); + assert!(!stdout.contains("Test content 4")); + assert!(stdout.contains("use std::fs;")); + Ok(()) + } + #[test] fn test_config_file() -> Result<(), Box> { let temp_dir = tempdir()?; diff --git a/tests/parallel_test.rs b/tests/parallel_test.rs index c55db31..181de32 100644 --- a/tests/parallel_test.rs +++ b/tests/parallel_test.rs @@ -1,8 +1,10 @@ +use anyhow::Result; use normalize_path::NormalizePath; use std::collections::HashMap; -use std::fs; +use std::fs::{self, File}; +use std::io::Write; use std::os::unix::fs::PermissionsExt; -use std::path::Path; +use std::path::{Path, PathBuf}; use tempfile::tempdir; use yek::config::YekConfig; use yek::parallel::process_files_parallel; @@ -62,7 +64,6 @@ fn test_process_files_parallel_empty() { #[test] fn test_process_files_parallel_with_files() { - use std::fs; let temp_dir = tempdir().expect("failed to create temp dir"); let file_names = vec!["a.txt", "b.txt", "c.txt"]; for &file in &file_names { @@ -135,3 +136,105 @@ fn test_process_files_parallel_walk_error() { let processed_files = result.unwrap(); assert_eq!(processed_files.len(), 0); // No files processed due to walk error } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_glob_pattern_single_file() -> Result<()> { + let temp_dir = tempdir()?; + let file_path = temp_dir.path().join("test.txt"); + let mut file = File::create(&file_path)?; + writeln!(file, "Test content")?; + + let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string(); + let config = YekConfig::default(); + let boost_map = HashMap::new(); + + let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?; + assert_eq!(result.len(), 1); + assert_eq!(result[0].rel_path, "test.txt"); + + Ok(()) + } + + #[test] + fn test_glob_pattern_multiple_files() -> Result<()> { + let temp_dir = tempdir()?; + + // Create multiple test files + let files = vec!["test1.txt", "test2.txt", "other.md"]; + for fname in &files { + let file_path = temp_dir.path().join(fname); + let mut file = File::create(&file_path)?; + writeln!(file, "Test content for {}", fname)?; + } + + let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string(); + let config = YekConfig::default(); + let boost_map = HashMap::new(); + + let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?; + assert_eq!(result.len(), 2); // Should only match .txt files + + let paths: Vec = result.iter().map(|f| f.rel_path.clone()).collect(); + assert!(paths.contains(&"test1.txt".to_string())); + assert!(paths.contains(&"test2.txt".to_string())); + + Ok(()) + } + + #[test] + fn test_glob_pattern_nested_directories() -> Result<()> { + let temp_dir = tempdir()?; + + // Create nested directory structure + let nested_dir = temp_dir.path().join("nested"); + fs::create_dir(&nested_dir)?; + + // Create files in both root and nested directory + let root_file = temp_dir.path().join("root.txt"); + let nested_file = nested_dir.join("nested.txt"); + let other_file = temp_dir.path().join("other.md"); + + for (path, content) in [ + (&root_file, "Root content"), + (&nested_file, "Nested content"), + (&other_file, "Other content"), + ] { + let mut file = File::create(path)?; + writeln!(file, "{}", content)?; + } + + let glob_pattern = temp_dir + .path() + .join("**/*.txt") + .to_string_lossy() + .to_string(); + let config = YekConfig::default(); + let boost_map = HashMap::new(); + + let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?; + assert_eq!(result.len(), 2); // Should match both .txt files + + let paths: Vec = result.iter().map(|f| f.rel_path.clone()).collect(); + assert!(paths.contains(&"root.txt".to_string())); + assert!(paths.contains(&"nested.txt".to_string())); + + Ok(()) + } + + #[test] + fn test_glob_pattern_no_matches() -> Result<()> { + let temp_dir = tempdir()?; + let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string(); + let config = YekConfig::default(); + let boost_map = HashMap::new(); + + let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?; + assert!(result.is_empty()); + + Ok(()) + } +}