Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow globs to be passed into yek #111

Merged
merged 6 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.21.0] - 2025-02-23
[0.21.0]: https://github.com/bodo-run/yek/compare/v0.20.0...v0.21.0
### Bug Fixes

- Glob pattern handling in e2e tests

### Documentation

- Update README with glob pattern and file selection support

### Features

- Handle glob patterns in input paths

### Testing

- Add comprehensive tests for glob pattern support

### Ci

- Run release and publish jobs on main branch
- Only run release and publish on tag pushes
- Trigger release on tag merge to main

## [0.20.0] - 2025-02-22
[0.20.0]: https://github.com/bodo-run/yek/compare/v0.19.0...v0.20.0
### Bug Fixes
Expand All @@ -24,6 +48,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Add integration test

### Release

- V0.20.0

## [0.19.0] - 2025-02-19
[0.19.0]: https://github.com/bodo-run/yek/compare/v0.18.0...v0.19.0
### Bug Fixes
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "yek"
version = "0.20.0"
version = "0.21.0"
edition = "2021"
description = "A tool to serialize a repository into chunks of text files"
license = "MIT"
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ By default:
- Infers additional ignore patterns (binary, large, etc.).
- Automatically detects if output is being piped and streams content instead of writing to files.
- Supports processing multiple directories in a single command.
- Supports glob patterns and individual file selection.
- Configurable via a `yek.yaml` file.

Yek <a href="https://fa.wikipedia.org/wiki/۱">يک</a> means "One" in Farsi/Persian.
Expand Down Expand Up @@ -113,6 +114,25 @@ Process multiple directories:
yek src/ tests/
```

Porcess multiple files

```bash
yek file1.txt file2.txt file3.txt
```

Use glob patterns:

```bash
yek "src/**/*.ts"
```

```bash
yek "src/main.rs" "tests/*.rs" "docs/README.md"
```

> [!NOTE]
> When using glob patterns, make sure to quote them to prevent shell expansion.

### CLI Reference

```bash
Expand Down
39 changes: 35 additions & 4 deletions src/parallel.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::{config::YekConfig, priority::get_file_priority, Result};
use content_inspector::{inspect, ContentType};
use glob::glob;
use ignore::gitignore::GitignoreBuilder;
use path_slash::PathBufExt;
use rayon::prelude::*;
Expand Down Expand Up @@ -81,12 +82,42 @@ pub fn process_files_parallel(
config: &YekConfig,
boost_map: &HashMap<String, i32>,
) -> Result<Vec<ProcessedFile>> {
// If it's a file, process it directly
if base_path.is_file() {
return process_single_file(base_path, config, boost_map);
// Expand globs into a list of paths
let mut expanded_paths = Vec::new();
let path_str = base_path.to_string_lossy();
for entry in glob(&path_str)? {
match entry {
Ok(path) => expanded_paths.push(path),
Err(e) => debug!("Glob entry error: {:?}", e),
}
}

// If it's a single file (no glob expansion or single file result), process it directly
if expanded_paths.len() == 1 && expanded_paths[0].is_file() {
return process_single_file(&expanded_paths[0], config, boost_map);
}

// Iterate over expanded paths, handling files and directories
let mut all_processed_files = Vec::new();
for path in expanded_paths {
if path.is_file() {
all_processed_files.extend(process_single_file(&path, config, boost_map)?);
} else if path.is_dir() {
// For directories, use the original recursive logic
all_processed_files.extend(process_files_parallel_internal(&path, config, boost_map)?);
}
}

// Otherwise, it's a directory, so walk it
Ok(all_processed_files)
}

/// Internal function to handle directory recursion (separated for clarity)
fn process_files_parallel_internal(
base_path: &Path,
config: &YekConfig,
boost_map: &HashMap<String, i32>,
) -> Result<Vec<ProcessedFile>> {
// It's a directory, so walk it
let mut walk_builder = ignore::WalkBuilder::new(base_path);

// Standard filters + no follow symlinks
Expand Down
10 changes: 5 additions & 5 deletions tests/config_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,11 @@ fn test_extend_config_with_defaults() {

assert_eq!(cfg.input_paths, input_paths);
assert_eq!(cfg.output_dir, Some(output_dir));
assert_eq!(cfg.version, false);
assert!(!cfg.version);
assert_eq!(cfg.max_size, "10MB".to_string());
assert_eq!(cfg.tokens, String::new());
assert_eq!(cfg.json, false);
assert_eq!(cfg.debug, false);
assert!(!cfg.json);
assert!(!cfg.debug);
assert_eq!(cfg.output_template, DEFAULT_OUTPUT_TEMPLATE.to_string());
assert_eq!(cfg.ignore_patterns, Vec::<String>::new());
assert_eq!(cfg.unignore_patterns, Vec::<String>::new());
Expand All @@ -355,8 +355,8 @@ fn test_extend_config_with_defaults() {
.collect::<Vec<_>>()
);
assert_eq!(cfg.git_boost_max, Some(100));
assert_eq!(cfg.stream, false);
assert_eq!(cfg.token_mode, false);
assert!(!cfg.stream);
assert!(!cfg.token_mode);
assert_eq!(cfg.output_file_full_path, None);
assert_eq!(cfg.max_git_depth, 100);
}
Expand Down
71 changes: 70 additions & 1 deletion tests/e2e_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ mod e2e_tests {
assert!(output.status.success());

// Ensure output dir is printed in stdout
let stdout = String::from_utf8_lossy(&output.stdout);
let stdout = String::from_utf8(output.stdout)?;
assert!(
stdout.contains(&output_dir.display().to_string()),
"Expected output directory `{}` to be printed in stdout, but it was {}",
Expand Down Expand Up @@ -215,6 +215,75 @@ mod e2e_tests {
Ok(())
}

#[test]
fn test_glob_pattern() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(output.status.success());
assert!(stdout.contains("Test content"));
Ok(())
}

#[test]
fn test_mix_of_files_and_dirs() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;
fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?;
let dir = temp_dir.path().join("dir");
fs::create_dir(&dir)?;
fs::write(dir.join("test3"), "Test content 3")?;

Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.assert()
.success();

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.arg("*.txt")
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(stdout.contains("Test content"));
assert!(stdout.contains("Test content 2"));
assert!(!stdout.contains("Test content 3"));
Ok(())
}

#[test]
fn test_mix_of_files_and_dirs_with_glob_pattern() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
fs::write(temp_dir.path().join("test.txt"), "Test content")?;
fs::write(temp_dir.path().join("test2.txt"), "Test content 2")?;
fs::write(temp_dir.path().join("code.rs"), "use std::fs;")?;
let dir = temp_dir.path().join("dir");
fs::create_dir(&dir)?;
fs::write(dir.join("test4"), "Test content 4")?;

Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.args(["*.txt", "code.rs"])
.assert()
.success();

let output = Command::cargo_bin("yek")?
.current_dir(temp_dir.path())
.args(["*.txt", "code.rs"])
.output()?;
let stdout = String::from_utf8(output.stdout)?;
assert!(stdout.contains("Test content"));
assert!(stdout.contains("Test content 2"));
assert!(!stdout.contains("Test content 4"));
assert!(stdout.contains("use std::fs;"));
Ok(())
}

#[test]
fn test_config_file() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempdir()?;
Expand Down
109 changes: 106 additions & 3 deletions tests/parallel_test.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use anyhow::Result;
use normalize_path::NormalizePath;
use std::collections::HashMap;
use std::fs;
use std::fs::{self, File};
use std::io::Write;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::path::{Path, PathBuf};
use tempfile::tempdir;
use yek::config::YekConfig;
use yek::parallel::process_files_parallel;
Expand Down Expand Up @@ -62,7 +64,6 @@ fn test_process_files_parallel_empty() {

#[test]
fn test_process_files_parallel_with_files() {
use std::fs;
let temp_dir = tempdir().expect("failed to create temp dir");
let file_names = vec!["a.txt", "b.txt", "c.txt"];
for &file in &file_names {
Expand Down Expand Up @@ -135,3 +136,105 @@ fn test_process_files_parallel_walk_error() {
let processed_files = result.unwrap();
assert_eq!(processed_files.len(), 0); // No files processed due to walk error
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_glob_pattern_single_file() -> Result<()> {
let temp_dir = tempdir()?;
let file_path = temp_dir.path().join("test.txt");
let mut file = File::create(&file_path)?;
writeln!(file, "Test content")?;

let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 1);
assert_eq!(result[0].rel_path, "test.txt");

Ok(())
}

#[test]
fn test_glob_pattern_multiple_files() -> Result<()> {
let temp_dir = tempdir()?;

// Create multiple test files
let files = vec!["test1.txt", "test2.txt", "other.md"];
for fname in &files {
let file_path = temp_dir.path().join(fname);
let mut file = File::create(&file_path)?;
writeln!(file, "Test content for {}", fname)?;
}

let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 2); // Should only match .txt files

let paths: Vec<String> = result.iter().map(|f| f.rel_path.clone()).collect();
assert!(paths.contains(&"test1.txt".to_string()));
assert!(paths.contains(&"test2.txt".to_string()));

Ok(())
}

#[test]
fn test_glob_pattern_nested_directories() -> Result<()> {
let temp_dir = tempdir()?;

// Create nested directory structure
let nested_dir = temp_dir.path().join("nested");
fs::create_dir(&nested_dir)?;

// Create files in both root and nested directory
let root_file = temp_dir.path().join("root.txt");
let nested_file = nested_dir.join("nested.txt");
let other_file = temp_dir.path().join("other.md");

for (path, content) in [
(&root_file, "Root content"),
(&nested_file, "Nested content"),
(&other_file, "Other content"),
] {
let mut file = File::create(path)?;
writeln!(file, "{}", content)?;
}

let glob_pattern = temp_dir
.path()
.join("**/*.txt")
.to_string_lossy()
.to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert_eq!(result.len(), 2); // Should match both .txt files

let paths: Vec<String> = result.iter().map(|f| f.rel_path.clone()).collect();
assert!(paths.contains(&"root.txt".to_string()));
assert!(paths.contains(&"nested.txt".to_string()));

Ok(())
}

#[test]
fn test_glob_pattern_no_matches() -> Result<()> {
let temp_dir = tempdir()?;
let glob_pattern = temp_dir.path().join("*.txt").to_string_lossy().to_string();
let config = YekConfig::default();
let boost_map = HashMap::new();

let result = process_files_parallel(&PathBuf::from(&glob_pattern), &config, &boost_map)?;
assert!(result.is_empty());

Ok(())
}
}
Loading