diff --git a/Cargo.lock b/Cargo.lock index 3bb3ce8..76fb02e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,6 +103,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.5.0" @@ -194,7 +200,7 @@ version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -212,6 +218,18 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "crossterm", + "strum", + "strum_macros", + "unicode-width", +] + [[package]] name = "core-foundation-sys" version = "0.8.4" @@ -227,6 +245,28 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags 2.5.0", + "crossterm_winapi", + "libc", + "parking_lot", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "csv" version = "1.3.0" @@ -281,6 +321,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -369,6 +415,16 @@ version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.21" @@ -416,6 +472,29 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.48.5", +] + [[package]] name = "pkg-config" version = "0.3.30" @@ -440,6 +519,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "regex" version = "1.9.5" @@ -475,19 +563,31 @@ version = "0.38.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" dependencies = [ - "bitflags", + "bitflags 2.5.0", "errno", "libc", "linux-raw-sys", "windows-sys 0.48.0", ] +[[package]] +name = "rustversion" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" + [[package]] name = "ryu" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.193" @@ -508,12 +608,37 @@ dependencies = [ "syn", ] +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" + +[[package]] +name = "strum_macros" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "syn" version = "2.0.37" @@ -550,6 +675,12 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + [[package]] name = "utf8parse" version = "0.2.1" @@ -798,6 +929,7 @@ dependencies = [ "bzip2", "chrono", "clap", + "comfy-table", "csv", "env_logger", "flate2", diff --git a/Cargo.toml b/Cargo.toml index 4c92399..67d1ce4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ atty = "0.2.14" bzip2 = "0.4.4" chrono = "0.4.38" clap = { version = "4.5.4", features = ["derive", "wrap_help"] } +comfy-table = "7.1.1" csv = "1.3.0" env_logger = "0.10.0" flate2 = "1.0.28" diff --git a/README.md b/README.md index 374151a..be5e56c 100644 --- a/README.md +++ b/README.md @@ -41,11 +41,16 @@ Usage: xtab [OPTIONS] [CSV] Commands: addheader Set new header for CSV file [aliases: ah] dim Dimensions of CSV file - head Get first N records from CSV file - uniq Unique data without sorting + drop Drop or Select CSV fields by columns index + freq freq + head Print first N records from CSV file + pretty Convert CSV to a readable aligned table + tail Print last N records from CSV file + uniq Unique data with keys view Show CSV file content help Print this message or the help of the given subcommand(s) + Options: -h, --help Print help (see more with '--help') -V, --version Print version diff --git a/example/demo.csv b/example/demo.csv new file mode 100644 index 0000000..03a594d --- /dev/null +++ b/example/demo.csv @@ -0,0 +1,21 @@ +sampleid,time,value,bact,FZ69010001,FZ69010002,FZ69010003,FZ69010004,FZ69010005,FZ69010006,FZ69010009,FZ69010011,FZ69010012,FZ69010014 +5021150000747,S1,28.16,阳性,G,A,A,C,T,T,T,C,A,AG +5021309000975,S1,NoCt,阳性,0,A,0,0,T,0,T,0,0,0 +5020951000970,S1,24.03,阳性,G,A,A,C,T,CT,T,C,A,AG +5021380000841,S1,36.76,阳性,G,A,A,C,T,C,T,C,A,G +5021380000856,S1,19.85,阳性,G,A,A,C,T,C,T,C,A,A +5021309001064,S1,27.36,阳性,G,A,A,C,T,C,T,C,A,A +5021265000525,S1,33,阳性,G,A,A,C,T,T,T,C,A,AG +5021309000755,S1,23.8,阳性,G,A,A,C,T,T,T,C,A,A +5021225001940,S1,37.19,阳性,G,A,A,C,0,C,CT,C,A,G +5021309000987,S1,18.64,阳性,G,A,A,C,T,C,T,C,A,A +5021225001316,S1,20.09,阳性,G,A,A,C,T,C,T,C,A,A +5020951000905,S1,27.38,阳性,G,A,A,C,T,C,T,C,A,G +5021265001963,S1,35.48,阳性,G,A,A,C,0,C,0,0,A,A +5021309000993,S1,26.94,阳性,G,A,A,C,T,C,T,C,A,A +5021309001694,S1,32.07,阳性,G,A,A,C,T,C,T,C,A,A +5021380000846,S1,31.19,阳性,G,A,A,C,T,C,T,C,A,A +5021380000097,S1,28.63,阳性,G,A,A,C,T,C,T,C,A,A +5021309001944,S1,37.26,阳性,0,A,0,C,0,C,0,C,A,A +5021309001684,S1,26.65,阳性,G,A,A,C,T,C,T,C,A,A +5021380001015,S1,36.03,阳性,0,A,0,0,0,C,0,C,0,A diff --git a/example/tmp.csv b/example/long.csv similarity index 100% rename from example/tmp.csv rename to example/long.csv diff --git a/example/toy.tsv b/example/toy.tsv index 3fc69bf..6d6fcd9 100644 --- a/example/toy.tsv +++ b/example/toy.tsv @@ -1,2 +1,2 @@ a c -v c +v d diff --git a/src/args.rs b/src/args.rs index 1d0a02b..6e30d53 100644 --- a/src/args.rs +++ b/src/args.rs @@ -31,10 +31,6 @@ pub struct Args { #[arg(value_name = "CSV", global = true, help_heading = Some("Global Arguments"))] pub input: Option, - /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout - #[arg(short = 'o', long = "out", value_name = "FILE", global = true, help_heading = Some("Global Arguments"))] - pub output: Option, - /// If set, the first row is treated as a special header row, and the original header row excluded from output #[arg(short = 'H', long = "no-header", global = true, help_heading = Some("Global FLAGS"))] pub no_header: bool, @@ -73,20 +69,102 @@ pub enum Cmd { /// Set new header for CSV file #[command(visible_alias = "ah")] addheader { - /// Set new header, e.g -N "colum1,column2..." - #[arg(short = 'N', long = "new-header", value_name = "STR")] + /// Set new header, e.g -n "colum1,column2..." + #[arg(short = 'n', long = "new-header", value_name = "STR")] new_header: String, + /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout + #[arg(short = 'o', long = "out", value_name = "FILE")] + output: Option, }, + /// Dimensions of CSV file - dim {}, - /// Get first N records from CSV file + dim { + /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout + #[arg(short = 'o', long = "out", value_name = "FILE")] + output: Option, + }, + + /// Drop or Select CSV fields by columns index + drop { + /// Select columns index, e.g -c 2,3,5 + #[arg(short = 'c', long = "col-index", value_name = "STR")] + col_index: String, + /// invert the sense of matching, to select non-matching fields + #[arg(short = 'u', long = "invert-match", help_heading = Some("FLAGS"))] + invert: bool, + /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout + #[arg(short = 'o', long = "out", value_name = "FILE")] + output: Option, + }, + + /// freq + freq { + /// Select columns index, e.g -c 2,3,5 + #[arg(short = 'c', long = "col-index", value_name = "STR")] + col_index: String, + /// Sort by key + #[arg(short = 'k', long = "sort-by-key", help_heading = Some("FLAGS"))] + key: bool, + /// sort by frequency + #[arg(short = 'n', long = "sort-by-freq", help_heading = Some("FLAGS"))] + value: bool, + /// Output reversed result + #[arg(short = 'r', long = "rev", help_heading = Some("FLAGS"))] + rev: bool, + /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout + #[arg(short = 'o', long = "out", value_name = "FILE")] + output: Option, + }, + + /// Print first N records from CSV file head { /// Print first N records, if option "--no-header" enabled, the original header row excluded from output #[arg(short = 'n', long = "num", default_value_t = 10, value_name = "INT")] num: usize, + /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout + #[arg(short = 'o', long = "out", value_name = "FILE")] + output: Option, }, - /// Unique data without sorting - uniq {}, + + /// Convert CSV to a readable aligned table + pretty { + /// Set the whole table width + #[arg(short = 'w', long = "width-table", value_name = "INT", value_parser = value_parser!(u16).range(0..=65535))] + width_table: Option, + /// If set, truncate content of cells which occupies more than INT lines of space + #[arg(short = 't', long="truncate", value_name = "INT")] + cell_height: Option, + /// Set the alignment of content for each cell, possible values: {left, center, right} + #[arg(short ='a', long = "aln", value_name = "STR", default_value_t = String::from("left"))] + aln: String, + /// Show header in different style + #[arg(long = "header", help_heading = Some("FLAGS"))] + header: bool, + }, + + /// Print last N records from CSV file + tail { + /// Print last N records, if option "--no-header" enabled, the original header row excluded from output + #[arg(short = 'n', long = "num", default_value_t = 10, value_name = "INT")] + num: usize, + /// Output reversed result + #[arg(short = 'r', long = "reverse", help_heading = Some("FLAGS"))] + rev: bool, + /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout + #[arg(short = 'o', long = "out", value_name = "FILE")] + output: Option, + }, + + /// Unique data with keys + uniq { + /// Select these fields as keys. e.g -k 2,3,5 + #[arg(short = 'k', long = "key", value_name = "STR")] + key: String, + /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout + #[arg(short = 'o', long = "out", value_name = "FILE")] + output: Option, + }, + /// Show CSV file content view { /// Skip first N records, not include the header row when option "--no-header" enabled. eg "-s 10 --no-header" will skip 11 records @@ -95,5 +173,8 @@ pub enum Cmd { /// If enabled, truncate each record to N fields, if N is greater than the number of fields in this record, then this has no effect #[arg(short = 't', long = "truncate", value_name = "INT")] truncate: Option, + /// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout + #[arg(short = 'o', long = "out", value_name = "FILE")] + output: Option, }, } \ No newline at end of file diff --git a/src/command/addheader.rs b/src/command/addheader.rs index a7cf025..8fec09f 100644 --- a/src/command/addheader.rs +++ b/src/command/addheader.rs @@ -19,10 +19,10 @@ pub fn addheader_csv( .flexible(true) .has_headers(false) .from_reader(file_reader(csv.as_ref())?); - + match csv { - Some(csv) => info!("read file from: {:?}",csv), - None => info!("read file from stdin ") + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), } info!("new header is: {}", new_header); diff --git a/src/command/dim.rs b/src/command/dim.rs index 3e62ed4..0d7998a 100644 --- a/src/command/dim.rs +++ b/src/command/dim.rs @@ -1,9 +1,8 @@ +use crate::utils::*; +use anyhow::{Error, Ok}; use csv::ReaderBuilder; -use anyhow::{Ok, Error}; -use std::{path::PathBuf, time::Instant}; use log::*; -use crate::utils::*; - +use std::{path::PathBuf, time::Instant}; pub fn dim_csv( no_header: bool, @@ -19,19 +18,19 @@ pub fn dim_csv( .flexible(true) .delimiter(delimiter) .from_reader(file_reader(csv.as_ref())?); - + match &csv { - Some(csv) => info!("read file from: {:?}",csv), - None => info!("read file from stdin ") + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), } - + let mut row = 0usize; let mut col = None::; - for rec in csv_reader.records().flatten() { + for rec in csv_reader.records().flatten() { row += 1; if let Some(col) = col { if col != rec.len() { - error!("record on line {}: wrong number of fields",row); + error!("record on line {}: wrong number of fields", row); std::process::exit(1); } } else { @@ -40,7 +39,7 @@ pub fn dim_csv( } let mut out_writer = file_writer(csvo.as_ref(), compression_level)?; - + let buf = if let Some(file) = csv { format!("file\trows\tcols\n{:?}\t{}\t{}\n", file, row, col.unwrap()) } else { @@ -51,4 +50,4 @@ pub fn dim_csv( info!("time elapsed is: {:?}", start.elapsed()); Ok(()) -} \ No newline at end of file +} diff --git a/src/command/drop.rs b/src/command/drop.rs new file mode 100644 index 0000000..c21a639 --- /dev/null +++ b/src/command/drop.rs @@ -0,0 +1,69 @@ +use crate::utils::*; +use anyhow::{Error, Ok}; +use csv::{ReaderBuilder, StringRecord, WriterBuilder}; +use log::*; +use std::{path::PathBuf, time::Instant}; + +pub fn drop_csv( + no_header: bool, + delimiter: u8, + out_delimiter: u8, + index_str: String, + invert: bool, + csv: Option, + csvo: Option, + compression_level: u32, +) -> Result<(), Error> { + let start = Instant::now(); + + let mut csv_reader = ReaderBuilder::new() + .has_headers(no_header) + .flexible(true) + .delimiter(delimiter) + .from_reader(file_reader(csv.as_ref())?); + + match csv { + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), + } + + let mut col_index = vec![]; + for idx in index_str.split(',').collect::>() { + let idx = idx.parse::()?; + if col_index.contains(&idx) { + warn!("duplicate columns index {}, keep first one",idx); + continue; + } else { + col_index.push(idx); + } + if idx == 0 { + error!("col_index error : {}, start from 1", idx); + std::process::exit(1); + } + } + + let mut csv_writer = WriterBuilder::new() + .has_headers(no_header) + .delimiter(out_delimiter) + .from_writer(file_writer(csvo.as_ref(), compression_level)?); + + for rec in csv_reader.records().flatten() { + let mut rec_new = StringRecord::new(); + for (idx,each) in rec.iter().enumerate() { + if invert { + if col_index.contains(&(idx+1)) { + rec_new.push_field(each); + } + } else { + if !col_index.contains(&(idx+1)) { + rec_new.push_field(each); + } + } + } + csv_writer.write_record(&rec_new)?; + } + csv_writer.flush()?; + + info!("time elapsed is: {:?}", start.elapsed()); + Ok(()) +} diff --git a/src/command/freq.rs b/src/command/freq.rs new file mode 100644 index 0000000..9b79bed --- /dev/null +++ b/src/command/freq.rs @@ -0,0 +1,132 @@ +use crate::utils::*; +use anyhow::{Error, Ok}; +use csv::{ReaderBuilder, StringRecord, WriterBuilder}; +use log::*; +use std::{path::PathBuf, time::Instant,collections::HashMap}; + +pub fn freq_csv( + no_header: bool, + delimiter: u8, + out_delimiter: u8, + index_str: String, + sort_key: bool, + sort_value: bool, + sort_reverse: bool, + csv: Option, + csvo: Option, + compression_level: u32, +) -> Result<(), Error> { + let start = Instant::now(); + + let mut csv_reader = ReaderBuilder::new() + .has_headers(no_header) + .flexible(true) + .delimiter(delimiter) + .from_reader(file_reader(csv.as_ref())?); + + let mut flag = 0usize; + if sort_key { + flag += 1; + } + if sort_value { + flag += 1; + } + if flag > 1 { + error!("only one of the flags --sort-by-key, --sort-by-freq is allowed"); + std::process::exit(1); + } + match csv { + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), + } + + let mut col_index = vec![]; + for idx in index_str.split(',').collect::>() { + let idx = idx.parse::()?; + col_index.push(idx); + if idx == 0 { + error!("col_index error : {}, start from 1", idx); + std::process::exit(1); + } + } + + let mut hash: HashMap = HashMap::new(); + let mut raw_order = vec![]; + for rec in csv_reader.records().flatten() { + let mut keys = vec![]; + for (idx,each) in rec.iter().enumerate() { + if col_index.contains(&(idx+1)) { + keys.push(each); + keys.push(","); + } + } + let key =keys.concat(); + if !raw_order.contains(&key) { + raw_order.push(key.clone()); + } + *hash.entry(key).or_insert(0) += 1; + } + + let mut csv_writer = WriterBuilder::new() + .has_headers(no_header) + .delimiter(out_delimiter) + .from_writer(file_writer(csvo.as_ref(), compression_level)?); + + if sort_key { + let mut count = hash.iter().collect::>(); + if sort_reverse { + count.sort_by(|x,y| y.0.cmp(x.0)); + } else { + count.sort_by(|x,y| x.0.cmp(y.0)); + } + + for (k,v) in count { + let mut tmp_keys = k.split(',').collect::>(); + tmp_keys.retain(|&x| x != ""); // strip last "" in tmp_keys + + let mut rec_new = StringRecord::new(); + for each in tmp_keys{ + rec_new.push_field(each); + } + rec_new.push_field(&v.to_string()); + csv_writer.write_record(&rec_new)?; + } + } else if sort_value { + let mut count = hash.iter().collect::>(); + if sort_reverse { + count.sort_by(|x,y| y.1.cmp(x.1)); + } else { + count.sort_by(|x,y| x.1.cmp(y.1)); + } + + for (k,v) in count { + let mut tmp_keys = k.split(',').collect::>(); + tmp_keys.retain(|&x| x != ""); // strip last "" in tmp_keys + + let mut rec_new = StringRecord::new(); + for each in tmp_keys{ + rec_new.push_field(each); + } + rec_new.push_field(&v.to_string()); + csv_writer.write_record(&rec_new)?; + } + } else { + for k in raw_order.iter() { + let mut tmp_keys = k.split(',').collect::>(); + tmp_keys.retain(|&x| x != ""); // strip last "" in tmp_keys + + let mut rec_new = StringRecord::new(); + for each in tmp_keys{ + rec_new.push_field(each); + } + let v = hash.get(k).unwrap(); + rec_new.push_field(&v.to_string()); + + csv_writer.write_record(&rec_new)?; + } + } + csv_writer.flush()?; + + info!("time elapsed is: {:?}", start.elapsed()); + Ok(()) +} diff --git a/src/command/head.rs b/src/command/head.rs index b4200ac..74c7ca4 100644 --- a/src/command/head.rs +++ b/src/command/head.rs @@ -1,9 +1,8 @@ -use csv::{WriterBuilder, ReaderBuilder}; -use anyhow::{Ok, Error}; -use std::{path::PathBuf, time::Instant}; -use log::*; use crate::utils::*; - +use anyhow::{Error, Ok}; +use csv::{ReaderBuilder, WriterBuilder}; +use log::*; +use std::{path::PathBuf, time::Instant}; pub fn head_csv( no_header: bool, @@ -21,10 +20,10 @@ pub fn head_csv( .flexible(true) .delimiter(delimiter) .from_reader(file_reader(csv.as_ref())?); - + match csv { - Some(csv) => info!("read file from: {:?}",csv), - None => info!("read file from stdin ") + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), } let mut csv_writer = WriterBuilder::new() @@ -39,4 +38,4 @@ pub fn head_csv( info!("time elapsed is: {:?}", start.elapsed()); Ok(()) -} \ No newline at end of file +} diff --git a/src/command/mod.rs b/src/command/mod.rs index be5d108..d819747 100644 --- a/src/command/mod.rs +++ b/src/command/mod.rs @@ -1,5 +1,10 @@ //pub mod xlsx2csv; -pub mod view; pub mod addheader; +pub mod dim; pub mod head; -pub mod dim; \ No newline at end of file +pub mod tail; +pub mod uniq; +pub mod view; +pub mod pretty; +pub mod drop; +pub mod freq; diff --git a/src/command/pretty.rs b/src/command/pretty.rs new file mode 100644 index 0000000..7c59211 --- /dev/null +++ b/src/command/pretty.rs @@ -0,0 +1,72 @@ +use crate::utils::*; +use anyhow::{Error, Ok}; +use csv::ReaderBuilder; +use log::*; +use comfy_table::{modifiers::UTF8_ROUND_CORNERS, presets::UTF8_FULL, *}; +use std::{path::PathBuf, time::Instant}; + +pub fn pretty_csv( + no_header: bool, + delimiter: u8, + table_width: Option, + cell_height: Option, + alignment: &str, + header: bool, + csv: Option, +) -> Result<(), Error> { + let start = Instant::now(); + + let mut csv_reader = ReaderBuilder::new() + .has_headers(no_header) + .flexible(true) + .delimiter(delimiter) + .from_reader(file_reader(csv.as_ref())?); + + match &csv { + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), + } + + let mut table = Table::new(); + table + .load_preset(UTF8_FULL) + .apply_modifier(UTF8_ROUND_CORNERS) + .set_content_arrangement(ContentArrangement::DynamicFullWidth); + + // set whole table width + if let Some(t) = table_width { + table.set_width(t); + } else { + table.width(); + } + + let mut n = 0usize; + for rec in csv_reader.records().flatten() { + n += 1; + let mut row = Row::new(); + // set cell max height + if let Some(h) = cell_height { + row.max_height(h); + } + + for each in rec.iter() { + let cell = match alignment { + "left" => Cell::new(each).set_alignment(CellAlignment::Left), + "center" => Cell::new(each).set_alignment(CellAlignment::Center), + "right" => Cell::new(each).set_alignment(CellAlignment::Right), + _ => Cell::new(each) + }; + row.add_cell(cell); + } + //csv has header + if header && n == 1 { + table.set_header(row); + continue; + } + table.add_row(row); + } + println!("{}",table); + + info!("time elapsed is: {:?}", start.elapsed()); + Ok(()) +} diff --git a/src/command/tail.rs b/src/command/tail.rs new file mode 100644 index 0000000..cdc8766 --- /dev/null +++ b/src/command/tail.rs @@ -0,0 +1,54 @@ +use crate::utils::*; +use anyhow::{Error, Ok}; +use csv::{ReaderBuilder, WriterBuilder}; +use log::*; +use std::{path::PathBuf, time::Instant}; + +pub fn tail_csv( + no_header: bool, + delimiter: u8, + out_delimiter: u8, + num: usize, + rev: bool, + csv: Option, + csvo: Option, + compression_level: u32, +) -> Result<(), Error> { + let start = Instant::now(); + + let mut csv_reader = ReaderBuilder::new() + .has_headers(no_header) + .flexible(true) + .delimiter(delimiter) + .from_reader(file_reader(csv.as_ref())?); + + match csv { + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), + } + let mut recs = vec![]; + for rec in csv_reader.records().flatten() { + recs.push(rec); + } + + let mut csv_writer = WriterBuilder::new() + .has_headers(no_header) + .delimiter(out_delimiter) + .from_writer(file_writer(csvo.as_ref(), compression_level)?); + + if rev { + info!("output reversed result"); + for rec in recs.iter().rev().take(num) { + csv_writer.write_record(rec)?; + } + } else { + for rec in recs.iter().rev().take(num).rev() { + csv_writer.write_record(rec)?; + } + } + + csv_writer.flush()?; + + info!("time elapsed is: {:?}", start.elapsed()); + Ok(()) +} diff --git a/src/command/uniq.rs b/src/command/uniq.rs index ae8d427..359d361 100644 --- a/src/command/uniq.rs +++ b/src/command/uniq.rs @@ -1,20 +1,77 @@ -use csv::{WriterBuilder, ReaderBuilder}; -use anyhow::{Ok, Error}; -use std::{path::PathBuf, time::Instant}; -use log::*; use crate::utils::*; - +use anyhow::{Error, Ok}; +use csv::{ReaderBuilder, WriterBuilder}; +use log::*; +use std::{path::PathBuf, time::Instant}; pub fn uniq_csv( no_header: bool, delimiter: u8, out_delimiter: u8, + index_str: String, csv: Option, csvo: Option, compression_level: u32, ) -> Result<(), Error> { let start = Instant::now(); + let mut csv_reader = ReaderBuilder::new() + .has_headers(no_header) + .flexible(true) + .delimiter(delimiter) + .from_reader(file_reader(csv.as_ref())?); + + match csv { + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), + } + + let mut col_index = vec![]; + for idx in index_str.split(',').collect::>() { + let idx = idx.parse::()?; + if col_index.contains(&idx) { + warn!("duplicate columns index {}, keep first one",idx); + continue; + } else { + col_index.push(idx); + } + if idx == 0 { + error!("col_index error : {}, start from 1", idx); + std::process::exit(1); + } + } + + let mut csv_writer = WriterBuilder::new() + .has_headers(no_header) + .delimiter(out_delimiter) + .from_writer(file_writer(csvo.as_ref(), compression_level)?); + + let mut row = 0usize; + let mut keys = vec![]; + for rec in csv_reader.records().flatten() { + row += 1; + let mut cols = vec![]; + + for idx in col_index.iter() { + match rec.get(idx - 1) { + Some(x) => cols.push(x), + None => { + error!("record on line {}: wrong index of fields", row); + std::process::exit(1); + } + } + } + + let key = cols.concat(); + if keys.contains(&key) { + continue; + } else { + keys.push(key); + csv_writer.write_record(&rec)?; + } + } + csv_writer.flush()?; + info!("time elapsed is: {:?}", start.elapsed()); Ok(()) -} \ No newline at end of file +} diff --git a/src/command/view.rs b/src/command/view.rs index bec21cd..f861fdb 100644 --- a/src/command/view.rs +++ b/src/command/view.rs @@ -1,9 +1,8 @@ -use csv::{WriterBuilder, ReaderBuilder}; -use anyhow::{Ok, Error}; -use std::{path::PathBuf, time::Instant}; -use log::info; use crate::utils::*; - +use anyhow::{Error, Ok}; +use csv::{ReaderBuilder, WriterBuilder}; +use log::info; +use std::{path::PathBuf, time::Instant}; pub fn view_csv( no_header: bool, @@ -24,8 +23,8 @@ pub fn view_csv( .from_reader(file_reader(csv.as_ref())?); match csv { - Some(csv) => info!("read file from: {:?}",csv), - None => info!("read file from stdin ") + Some(csv) => info!("read file from: {:?}", csv), + None => info!("read file from stdin "), } let mut csv_writer = WriterBuilder::new() @@ -45,4 +44,4 @@ pub fn view_csv( info!("time elapsed is: {:?}", start.elapsed()); Ok(()) -} \ No newline at end of file +} diff --git a/src/main.rs b/src/main.rs index f8681ff..29ecc5c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,7 +7,7 @@ mod command; mod utils; use command::{ - addheader::*, dim::dim_csv, head::*, view::* + addheader::addheader_csv, dim::dim_csv, drop::drop_csv, freq::freq_csv, head::head_csv, pretty::pretty_csv, tail::tail_csv, uniq::uniq_csv, view::view_csv }; @@ -18,19 +18,33 @@ fn main() -> Result<(), Error>{ loger::logger(cmd.verbose, cmd.logfile, cmd.quiet)?; match cmd.cmd { - args::Cmd::view {skip, truncate} => { - view_csv( cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, skip, truncate, cmd.input, cmd.output, cmd.compression_level)?; + args::Cmd::view {skip, truncate, output} => { + view_csv( cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, skip, truncate, cmd.input, output, cmd.compression_level)?; } - args::Cmd::addheader { new_header } => { - addheader_csv(new_header, cmd.delimiter as u8, cmd.out_delimite as u8, cmd.input, cmd.output, cmd.compression_level)?; + args::Cmd::addheader { new_header, output } => { + addheader_csv(new_header, cmd.delimiter as u8, cmd.out_delimite as u8, cmd.input, output, cmd.compression_level)?; } - args::Cmd::dim { } => { - dim_csv(cmd.no_header, cmd.delimiter as u8, cmd.input, cmd.output, cmd.compression_level)?; + args::Cmd::dim {output } => { + dim_csv(cmd.no_header, cmd.delimiter as u8, cmd.input, output, cmd.compression_level)?; } - args::Cmd::head { num } => { - head_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, num, cmd.input, cmd.output, cmd.compression_level)?; + args::Cmd::head { num, output } => { + head_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, num, cmd.input, output, cmd.compression_level)?; + } + args::Cmd::uniq { key , output} => { + uniq_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, key, cmd.input, output, cmd.compression_level)?; + } + args::Cmd::tail { num, rev, output } => { + tail_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, num, rev, cmd.input, output, cmd.compression_level)?; + } + args::Cmd::pretty { width_table, cell_height, aln, header } => { + pretty_csv(cmd.no_header, cmd.delimiter as u8, width_table, cell_height, &aln, header, cmd.input)?; + } + args::Cmd::drop { col_index, invert, output } => { + drop_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, col_index, invert, cmd.input, output, cmd.compression_level)?; + } + args::Cmd::freq { col_index,key, value, rev, output } => { + freq_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, col_index, key, value, rev, cmd.input, output, cmd.compression_level)?; } - args::Cmd::uniq { } => { todo!()} } Ok(())