diff --git a/CHANGELOG.md b/CHANGELOG.md
index e252fbd..bd62ba6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.21.0] - 2025-02-23
+[0.21.0]: https://github.com/bodo-run/yek/compare/v0.20.0...v0.21.0
+### Bug Fixes
+
+- Glob pattern handling in e2e tests
+
+### Documentation
+
+- Update README with glob pattern and file selection support
+
+### Features
+
+- Handle glob patterns in input paths
+
+### Testing
+
+- Add comprehensive tests for glob pattern support
+
+### Ci
+
+- Run release and publish jobs on main branch
+- Only run release and publish on tag pushes
+- Trigger release on tag merge to main
+
## [0.20.0] - 2025-02-22
[0.20.0]: https://github.com/bodo-run/yek/compare/v0.19.0...v0.20.0
### Bug Fixes
@@ -24,6 +48,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add integration test
+### Release
+
+- V0.20.0
+
## [0.19.0] - 2025-02-19
[0.19.0]: https://github.com/bodo-run/yek/compare/v0.18.0...v0.19.0
### Bug Fixes
diff --git a/Cargo.lock b/Cargo.lock
index 71666bf..d6025bf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4017,7 +4017,7 @@ dependencies = [
[[package]]
name = "yek"
-version = "0.20.0"
+version = "0.21.0"
dependencies = [
"anyhow",
"assert_cmd",
diff --git a/Cargo.toml b/Cargo.toml
index f1f0319..2a72983 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "yek"
-version = "0.20.0"
+version = "0.21.0"
edition = "2021"
description = "A tool to serialize a repository into chunks of text files"
license = "MIT"
diff --git a/README.md b/README.md
index ca0cf87..f4ec2e0 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,7 @@ By default:
- Infers additional ignore patterns (binary, large, etc.).
- Automatically detects if output is being piped and streams content instead of writing to files.
- Supports processing multiple directories in a single command.
+- Supports glob patterns and individual file selection.
- Configurable via a `yek.yaml` file.
Yek يک means "One" in Farsi/Persian.
@@ -113,6 +114,25 @@ Process multiple directories:
yek src/ tests/
```
+Porcess multiple files
+
+```bash
+yek file1.txt file2.txt file3.txt
+```
+
+Use glob patterns:
+
+```bash
+yek "src/**/*.ts"
+```
+
+```bash
+yek "src/main.rs" "tests/*.rs" "docs/README.md"
+```
+
+> [!NOTE]
+> When using glob patterns, make sure to quote them to prevent shell expansion.
+
### CLI Reference
```bash
diff --git a/src/parallel.rs b/src/parallel.rs
index 4d034ed..71404d2 100644
--- a/src/parallel.rs
+++ b/src/parallel.rs
@@ -1,5 +1,6 @@
use crate::{config::YekConfig, priority::get_file_priority, Result};
use content_inspector::{inspect, ContentType};
+use glob::glob;
use ignore::gitignore::GitignoreBuilder;
use path_slash::PathBufExt;
use rayon::prelude::*;
@@ -81,12 +82,42 @@ pub fn process_files_parallel(
config: &YekConfig,
boost_map: &HashMap,
) -> Result> {
- // If it's a file, process it directly
- if base_path.is_file() {
- return process_single_file(base_path, config, boost_map);
+ // Expand globs into a list of paths
+ let mut expanded_paths = Vec::new();
+ let path_str = base_path.to_string_lossy();
+ for entry in glob(&path_str)? {
+ match entry {
+ Ok(path) => expanded_paths.push(path),
+ Err(e) => debug!("Glob entry error: {:?}", e),
+ }
+ }
+
+ // If it's a single file (no glob expansion or single file result), process it directly
+ if expanded_paths.len() == 1 && expanded_paths[0].is_file() {
+ return process_single_file(&expanded_paths[0], config, boost_map);
+ }
+
+ // Iterate over expanded paths, handling files and directories
+ let mut all_processed_files = Vec::new();
+ for path in expanded_paths {
+ if path.is_file() {
+ all_processed_files.extend(process_single_file(&path, config, boost_map)?);
+ } else if path.is_dir() {
+ // For directories, use the original recursive logic
+ all_processed_files.extend(process_files_parallel_internal(&path, config, boost_map)?);
+ }
}
- // Otherwise, it's a directory, so walk it
+ Ok(all_processed_files)
+}
+
+/// Internal function to handle directory recursion (separated for clarity)
+fn process_files_parallel_internal(
+ base_path: &Path,
+ config: &YekConfig,
+ boost_map: &HashMap,
+) -> Result> {
+ // It's a directory, so walk it
let mut walk_builder = ignore::WalkBuilder::new(base_path);
// Standard filters + no follow symlinks
diff --git a/tests/config_test.rs b/tests/config_test.rs
index 8d7f6d5..602ddfe 100644
--- a/tests/config_test.rs
+++ b/tests/config_test.rs
@@ -338,11 +338,11 @@ fn test_extend_config_with_defaults() {
assert_eq!(cfg.input_paths, input_paths);
assert_eq!(cfg.output_dir, Some(output_dir));
- assert_eq!(cfg.version, false);
+ assert!(!cfg.version);
assert_eq!(cfg.max_size, "10MB".to_string());
assert_eq!(cfg.tokens, String::new());
- assert_eq!(cfg.json, false);
- assert_eq!(cfg.debug, false);
+ assert!(!cfg.json);
+ assert!(!cfg.debug);
assert_eq!(cfg.output_template, DEFAULT_OUTPUT_TEMPLATE.to_string());
assert_eq!(cfg.ignore_patterns, Vec::::new());
assert_eq!(cfg.unignore_patterns, Vec::::new());
@@ -355,8 +355,8 @@ fn test_extend_config_with_defaults() {
.collect::>()
);
assert_eq!(cfg.git_boost_max, Some(100));
- assert_eq!(cfg.stream, false);
- assert_eq!(cfg.token_mode, false);
+ assert!(!cfg.stream);
+ assert!(!cfg.token_mode);
assert_eq!(cfg.output_file_full_path, None);
assert_eq!(cfg.max_git_depth, 100);
}
diff --git a/tests/e2e_test.rs b/tests/e2e_test.rs
index 7848cd5..b2854aa 100644
--- a/tests/e2e_test.rs
+++ b/tests/e2e_test.rs
@@ -111,7 +111,7 @@ mod e2e_tests {
assert!(output.status.success());
// Ensure output dir is printed in stdout
- let stdout = String::from_utf8_lossy(&output.stdout);
+ let stdout = String::from_utf8(output.stdout)?;
assert!(
stdout.contains(&output_dir.display().to_string()),
"Expected output directory `{}` to be printed in stdout, but it was {}",
@@ -215,6 +215,75 @@ mod e2e_tests {
Ok(())
}
+ #[test]
+ fn test_glob_pattern() -> Result<(), Box> {
+ let temp_dir = tempdir()?;
+ fs::write(temp_dir.path().join("test.txt"), "Test content")?;
+
+ let output = Command::cargo_bin("yek")?
+ .current_dir(temp_dir.path())
+ .arg("*.txt")
+ .output()?;
+ let stdout = String::from_utf8(output.stdout)?;
+ assert!(output.status.success());
+ assert!(stdout.contains("Test content"));
+ Ok(())
+ }
+
+ #[test]
+ fn test_mix_of_files_and_dirs() -> Result<(), Box> {
+ let temp_dir = tempdir()?;
+ fs::write(temp_dir.path().join("test.txt"), "Test content")?;
+ fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?;
+ let dir = temp_dir.path().join("dir");
+ fs::create_dir(&dir)?;
+ fs::write(dir.join("test3"), "Test content 3")?;
+
+ Command::cargo_bin("yek")?
+ .current_dir(temp_dir.path())
+ .arg("*.txt")
+ .assert()
+ .success();
+
+ let output = Command::cargo_bin("yek")?
+ .current_dir(temp_dir.path())
+ .arg("*.txt")
+ .output()?;
+ let stdout = String::from_utf8(output.stdout)?;
+ assert!(stdout.contains("Test content"));
+ assert!(stdout.contains("Test content 2"));
+ assert!(!stdout.contains("Test content 3"));
+ Ok(())
+ }
+
+ #[test]
+ fn test_mix_of_files_and_dirs_with_glob_pattern() -> Result<(), Box> {
+ let temp_dir = tempdir()?;
+ fs::write(temp_dir.path().join("test.txt"), "Test content")?;
+ fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?;
+ fs::write(temp_dir.path().join("code.rs"), "use std::fs;")?;
+ let dir = temp_dir.path().join("dir");
+ fs::create_dir(&dir)?;
+ fs::write(dir.join("test4"), "Test content 4")?;
+
+ Command::cargo_bin("yek")?
+ .current_dir(temp_dir.path())
+ .args(["*.txt", "code.rs"])
+ .assert()
+ .success();
+
+ let output = Command::cargo_bin("yek")?
+ .current_dir(temp_dir.path())
+ .args(["*.txt", "code.rs"])
+ .output()?;
+ let stdout = String::from_utf8(output.stdout)?;
+ assert!(stdout.contains("Test content"));
+ assert!(stdout.contains("Test content 2"));
+ assert!(!stdout.contains("Test content 4"));
+ assert!(stdout.contains("use std::fs;"));
+ Ok(())
+ }
+
#[test]
fn test_config_file() -> Result<(), Box> {
let temp_dir = tempdir()?;
diff --git a/tests/parallel_test.rs b/tests/parallel_test.rs
index c55db31..181de32 100644
--- a/tests/parallel_test.rs
+++ b/tests/parallel_test.rs
@@ -1,8 +1,10 @@
+use anyhow::Result;
use normalize_path::NormalizePath;
use std::collections::HashMap;
-use std::fs;
+use std::fs::{self, File};
+use std::io::Write;
use std::os::unix::fs::PermissionsExt;
-use std::path::Path;
+use std::path::{Path, PathBuf};
use tempfile::tempdir;
use yek::config::YekConfig;
use yek::parallel::process_files_parallel;
@@ -62,7 +64,6 @@ fn test_process_files_parallel_empty() {
#[test]
fn test_process_files_parallel_with_files() {
- use std::fs;
let temp_dir = tempdir().expect("failed to create temp dir");
let file_names = vec!["a.txt", "b.txt", "c.txt"];
for &file in &file_names {
@@ -135,3 +136,105 @@ fn test_process_files_parallel_walk_error() {
let processed_files = result.unwrap();
assert_eq!(processed_files.len(), 0); // No files processed due to walk error
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_glob_pattern_single_file() -> Result<()> {
+ let temp_dir = tempdir()?;
+ let file_path = temp_dir.path().join("test.txt");
+ let mut file = File::create(&file_path)?;
+ writeln!(file, "Test content")?;
+
+ let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
+ let config = YekConfig::default();
+ let boost_map = HashMap::new();
+
+ let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
+ assert_eq!(result.len(), 1);
+ assert_eq!(result[0].rel_path, "test.txt");
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_glob_pattern_multiple_files() -> Result<()> {
+ let temp_dir = tempdir()?;
+
+ // Create multiple test files
+ let files = vec!["test1.txt", "test2.txt", "other.md"];
+ for fname in &files {
+ let file_path = temp_dir.path().join(fname);
+ let mut file = File::create(&file_path)?;
+ writeln!(file, "Test content for {}", fname)?;
+ }
+
+ let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
+ let config = YekConfig::default();
+ let boost_map = HashMap::new();
+
+ let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
+ assert_eq!(result.len(), 2); // Should only match .txt files
+
+ let paths: Vec = result.iter().map(|f| f.rel_path.clone()).collect();
+ assert!(paths.contains(&"test1.txt".to_string()));
+ assert!(paths.contains(&"test2.txt".to_string()));
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_glob_pattern_nested_directories() -> Result<()> {
+ let temp_dir = tempdir()?;
+
+ // Create nested directory structure
+ let nested_dir = temp_dir.path().join("nested");
+ fs::create_dir(&nested_dir)?;
+
+ // Create files in both root and nested directory
+ let root_file = temp_dir.path().join("root.txt");
+ let nested_file = nested_dir.join("nested.txt");
+ let other_file = temp_dir.path().join("other.md");
+
+ for (path, content) in [
+ (&root_file, "Root content"),
+ (&nested_file, "Nested content"),
+ (&other_file, "Other content"),
+ ] {
+ let mut file = File::create(path)?;
+ writeln!(file, "{}", content)?;
+ }
+
+ let glob_pattern = temp_dir
+ .path()
+ .join("**/*.txt")
+ .to_string_lossy()
+ .to_string();
+ let config = YekConfig::default();
+ let boost_map = HashMap::new();
+
+ let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
+ assert_eq!(result.len(), 2); // Should match both .txt files
+
+ let paths: Vec = result.iter().map(|f| f.rel_path.clone()).collect();
+ assert!(paths.contains(&"root.txt".to_string()));
+ assert!(paths.contains(&"nested.txt".to_string()));
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_glob_pattern_no_matches() -> Result<()> {
+ let temp_dir = tempdir()?;
+ let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
+ let config = YekConfig::default();
+ let boost_map = HashMap::new();
+
+ let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
+ assert!(result.is_empty());
+
+ Ok(())
+ }
+}