From 911cbe9c5966f31cdeea1bef5195949f2ee4aa2e Mon Sep 17 00:00:00 2001 From: kelko Date: Wed, 12 Oct 2022 09:17:18 +0200 Subject: [PATCH] string-value creating sub-pipelines (#1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ⚠️ CONTAINS BREAKING CHANGE ⚠️ For all commands creating textual content (`SET-TEXT-CONTENT`, `ADD-TEXT-CONTENT`, `ADD-COMMENT`, `SET-ATTR`) instead of only passing a pre-defined string value now also sub-pipelines can be used to read that string value from some place. E.g. read it from a (different) attribute of a (different) element or read the text content. - first step of such sub-pipeline always selects element to read the value from - `USE-ELEMENT`: take the target element and run value extraction on that - `USE-PARENT`: take the parent of the target element and run value extraction on that - `QUERY-ELEMENT`: run a CSS selector query on the target element and run value extraction on all matches - `QUERY-PARENT`: run a CSS selector query on the parent of the target element and run value extraction on all matches - `QUERY-ROOT`: run a CSS selector query on the root of the target element and run value extraction on all matches - second step of such sub-pipeline always defines which value to read from selected element (attr, text-content) - `GET-ATTR`: read an attribute of the selected element - `GET-TEXT-CONTENT`: read the [textContent](https://developer.mozilla.org/en-US/docs/Web/API/Node/textContent) of the selected element - future additions: - value-manipulating steps will be possible (up-casing, down-casing, regex-based replace) - _maybe_: a "quote"-like command to read text content from a file BREAKING CHANGES: - Rename `ONLY` to `EXTRACT-ELEMENT` (kept old name as alias, old alias is removed) - Rename `WITHOUT` to `REMOVE-ELEMENT` (kept old name as alias, old alias is removed) - Renamed alias `FOR` to `WITH` for `FOR-EACH` - Renamed `READ-FROM` to `LOAD-FILE` --- Cargo.toml | 6 +- README.md | 39 +- src/css/mod.rs | 9 +- src/element_creating/command.rs | 215 +++++++ src/element_creating/mod.rs | 5 + .../mod.rs => element_creating/pipeline.rs} | 50 +- .../command/mod.rs} | 247 ++++---- src/element_processing/command/tests.rs | 522 +++++++++++++++++ src/element_processing/mod.rs | 5 + src/element_processing/pipeline.rs | 126 +++++ src/html/mod.rs | 22 +- src/html/tests.rs | 14 +- src/lib.rs | 98 +++- src/parsing/mod.rs | 128 +++-- src/parsing/tests.rs | 451 +++++++++++---- src/pipeline/tests.rs | 533 ------------------ src/string_creating/command.rs | 451 +++++++++++++++ src/string_creating/mod.rs | 5 + src/string_creating/pipeline.rs | 87 +++ tests/add_comment.rs | 41 +- tests/add_element.rs | 45 +- tests/add_text_content.rs | 39 +- tests/clear_attr.rs | 2 +- tests/clear_content.rs | 2 +- tests/{only.rs => extract_element.rs} | 4 +- tests/{for_each.rs => for.rs} | 2 +- tests/{without.rs => remove_element.rs} | 2 +- tests/replace.rs | 36 +- tests/set_attr.rs | 87 ++- tests/set_text_content.rs | 74 ++- tests/source.html | 2 +- 31 files changed, 2422 insertions(+), 927 deletions(-) create mode 100644 src/element_creating/command.rs create mode 100644 src/element_creating/mod.rs rename src/{pipeline/mod.rs => element_creating/pipeline.rs} (53%) rename src/{command.rs => element_processing/command/mod.rs} (54%) create mode 100644 src/element_processing/command/tests.rs create mode 100644 src/element_processing/mod.rs create mode 100644 src/element_processing/pipeline.rs delete mode 100644 src/pipeline/tests.rs create mode 100644 src/string_creating/command.rs create mode 100644 src/string_creating/mod.rs create mode 100644 src/string_creating/pipeline.rs rename tests/{only.rs => extract_element.rs} (94%) rename tests/{for_each.rs => for.rs} (92%) rename tests/{without.rs => remove_element.rs} (95%) diff --git a/Cargo.toml b/Cargo.toml index eb82e42..ee8f8e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "html-streaming-editor" -version = "0.4.2" +version = "0.5.0" edition = "2021" authors = [":kelko: "] repository = "https://github.com/kelko/html-streaming-editor" @@ -15,11 +15,11 @@ keywords = ["html"] [dependencies] peg = "0.8.0" -tl = "0.7.6" +tl = "0.7.7" snafu = { version = "0.7", features = ["backtraces"] } clap = { version = "4.0.9", features = ["derive"] } exitcode = "1.1.2" log = "0.4" pretty_env_logger = "0.4.0" -rctree = "0.4.0" +rctree = "0.5.0" html-escape = "0.2.11" \ No newline at end of file diff --git a/README.md b/README.md index 6cc7afb..dd5511f 100644 --- a/README.md +++ b/README.md @@ -32,14 +32,24 @@ Some `COMMAND` use sub-pipelines. There are two kind of `COMMANDS` with this: The `SELECTOR` is a [CSS selector](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors). +Pipeline Types +----------------- + +There are three types of pipelines: + +- element processing pipeline: The default. You have some input HTML which you run through the pipeline +- element creating sub-pipeline: special sub-pipeline wherever a commands adds one or more elements into the HTML tree (or into a different place of said tree) +- string value creating sub-pipeline: special sub-pipeline wherever a commands set a string value (text content, comment, attribute value) + + Commands ------------- Currently supported: -- `ONLY`: remove everything not matching the CSS selector (alias: `SELECT`) -- `WITHOUT`: remove everything matching the CSS selector (alias: `FILTER`) -- `FOR-EACH`: run a sub-pipeline on all sub-elements matching a CSS selector but return the previously selected elements (alias: `FOR`) +- `EXTRACT-ELEMENT`: remove everything not matching the CSS selector (alias: `ONLY`) +- `REMOVE-ELEMENT`: remove everything matching the CSS selector (alias: `WITHOUT`) +- `FOR-EACH`: run a sub-pipeline on all sub-elements matching a CSS selector but return the previously selected elements (alias: `WITH`) - `CLEAR-ATTR`: removes a given attribute from the previously selected elements - `CLEAR-CONTENT`: clears all children from the previously selected elements - `SET-ATTR`: Sets a given attribute to a specified value @@ -49,7 +59,22 @@ Currently supported: - `ADD-ELEMENT`: appends a new tag/element child - `REPLACE`: replace all elements matching a CSS selector with new elements (alias: `MAP`) - `CREATE-ELEMENT`: creates a new, empty element, mainly in combination with `ADD-ELEMENT` or `REPLACE` (alias: `NEW`) -- `READ-FROM`: reads a DOM from a different file, mainly in combination with `ADD-ELEMENT` or `REPLACE` (alias: `SOURCE`) +- `LOAD-FILE`: reads a DOM from a different file, mainly in combination with `ADD-ELEMENT` or `REPLACE` (alias: `SOURCE`) +- `QUERY-REPLACED`: returns children matching the CSS selector of those elements meant to be replaced, only combination with or `REPLACE` (alias: `KEEP`) +- `USE-ELEMENT`: returns the currently selected element for a sub-pipeline, mainly in combination with "string value producing pipelines" (alias: `THIS`) +- `USE-PARENT`: returns the parent of the currently selected element for a sub-pipeline, mainly in combination with "string value producing pipelines" (alias: `PARENT`) +- `QUERY-ELEMENT`: runs a query on the currently selected element for a sub-pipeline, without detaching target element from HTML tree unlike `EXTRACT-ELEMENT` +- `QUERY-PARENT`: runs a query on the parent of the currently selected element for a sub-pipeline, without detaching target element from HTML tree unlike `EXTRACT-ELEMENT` +- `QUERY-ROOT`: runs a query on the root of the currently selected element for a sub-pipeline, without detaching target element from HTML tree unlike `EXTRACT-ELEMENT` +- `GET-ATTR`: returns the value of an attribute of the currently selected element for a string-value producing pipelines +- `GET-TEXT-CONTENT`: returns the text content of the currently selected element for a string-value producing pipelines + +Not Yet implemented: + +- `TO-LOWER`: all-lower the current string value of the pipeline +- `TO-UPPER`: all-caps the current string value of the pipeline +- `REGEX-REPLACE`: runs a RegEx-based value replacements on the current string value of the pipeline + Binary ------- @@ -81,11 +106,11 @@ hse -i index.html 'ONLY{main .content}' hse -i index.html 'ONLY{main, .main} | WITHOUT{script}' # replaces all elements with `placeholder` class with the
from a second HTML file -hse -i index.html 'REPLACE{.placeholder ↤ READ-FROM{"other.html"} | ONLY{div.content} }' +hse -i index.html 'REPLACE{.placeholder ↤ SOURCE{"other.html"} | ONLY{div.content} }' # add a new element to with git version info -hse -i index.html "FOR{head ↦ ADD-ELEMENT{ CREATE-ELEMENT{meta} | SET-ATTR{name ↤ 'version'} | SET-ATTR{content ↤ '`git describe --tags`'} } }" +hse -i index.html "WITH{head ↦ ADD-ELEMENT{ NEW{meta} | SET-ATTR{name ↤ 'version'} | SET-ATTR{content ↤ '`git describe --tags`'} } }" # add a new comment to with git version info -hse -i index.html "FOR{body ↦ ADD-COMMENT{'`git describe --tags`'}}" +hse -i index.html "WITH{body ↦ ADD-COMMENT{'`git describe --tags`'}}" ``` diff --git a/src/css/mod.rs b/src/css/mod.rs index bdaf6a8..c91b5cd 100644 --- a/src/css/mod.rs +++ b/src/css/mod.rs @@ -258,6 +258,7 @@ impl<'a> CssSelectorStep<'a> { pub struct CssSelectorPath<'a>(Vec>); impl<'a> CssSelectorPath<'a> { + #[cfg(test)] pub fn single(step: CssSelector<'a>) -> Self { CssSelectorPath(vec![CssSelectorStep::start(step)]) } @@ -268,10 +269,6 @@ impl<'a> CssSelectorPath<'a> { CssSelectorPath(list) } - pub fn as_vec(&self) -> Vec> { - return self.0.clone(); - } - pub(crate) fn query( &self, start: &Vec>, @@ -351,10 +348,6 @@ impl<'a> CssSelectorList<'a> { CssSelectorList(content) } - pub fn as_vec(&self) -> Vec> { - return self.0.clone(); - } - pub(crate) fn query( &self, start: &Vec>, diff --git a/src/element_creating/command.rs b/src/element_creating/command.rs new file mode 100644 index 0000000..41e2971 --- /dev/null +++ b/src/element_creating/command.rs @@ -0,0 +1,215 @@ +use crate::html::HtmlTag; +use crate::{load_html_file, CommandError, CssSelectorList, HtmlContent}; +use log::trace; + +#[derive(Debug, PartialEq, Clone)] +pub enum ElementCreatingCommand<'a> { + /// creates an HTML element of given type + /// Returns the created element as result. + CreateElement(&'a str), + /// reads a different file into memory + /// Returns the content of that file as result. + FromFile(&'a str), + /// Starting at the element being replaced run a sub-query + /// Returns all sub-elements that match the given CSS selector. + FromReplaced(CssSelectorList<'a>), +} + +impl<'a> ElementCreatingCommand<'a> { + /// perform the action defined by the command on the set of nodes + /// and return the calculated results. + /// For some command the output can be equal to the input, + /// others change the result-set + pub(crate) fn execute( + &self, + input: &Vec>, + ) -> Result>, CommandError> { + match self { + ElementCreatingCommand::CreateElement(element_name) => { + Self::create_element(element_name) + } + ElementCreatingCommand::FromFile(file_path) => Self::load_file(file_path), + ElementCreatingCommand::FromReplaced(selector) => Self::query_replaced(input, selector), + } + } + + fn create_element(name: &str) -> Result>, CommandError> { + trace!("Running CREATE-ELEMENT command using name: {:#?}", name); + + Ok(vec![rctree::Node::new(HtmlContent::Tag(HtmlTag::of_name( + name.clone(), + )))]) + } + + fn load_file(file_path: &str) -> Result>, CommandError> { + trace!("Running LOAD-FILE command using file: {:#?}", file_path); + + let root_element = load_html_file(file_path)?; + Ok(vec![root_element.make_deep_copy()]) + } + + fn query_replaced( + input: &Vec>, + selector: &CssSelectorList<'a>, + ) -> Result>, CommandError> { + trace!("Running QUERY-REPLACED command"); + Ok(selector + .query(input) + .iter() + .map(|e| rctree::Node::clone(e).make_deep_copy()) + .collect::>()) + } +} + +#[cfg(test)] +mod tests { + use crate::element_creating::ElementCreatingCommand; + use crate::html::HtmlTag; + use crate::{ + load_inline_html, CssSelector, CssSelectorList, CssSelectorPath, HtmlContent, + HtmlRenderable, + }; + use std::collections::BTreeMap; + + #[test] + fn create_element_builds_new_element_on_empty_input() { + let command = ElementCreatingCommand::CreateElement("div"); + + let mut result = command.execute(&vec![]).unwrap(); + + assert_eq!(result.len(), 1); + + let first_result = result.pop().unwrap(); + let first_result = first_result.borrow(); + assert_eq!(*first_result, HtmlContent::Tag(HtmlTag::of_name("div"))); + } + + #[test] + fn create_element_builds_new_element_ignoring_input() { + let command = ElementCreatingCommand::CreateElement("div"); + + let root = rctree::Node::new(HtmlContent::Tag(HtmlTag::of_name("html"))); + + let mut result = command.execute(&vec![root]).unwrap(); + + assert_eq!(result.len(), 1); + + let first_result = result.pop().unwrap(); + let first_result = first_result.borrow(); + assert_eq!(*first_result, HtmlContent::Tag(HtmlTag::of_name("div"))); + } + + #[test] + fn load_file_read_file_content() { + let command = ElementCreatingCommand::FromFile("tests/source.html"); + let mut result = command.execute(&vec![]).unwrap(); + + assert_eq!(result.len(), 1); + + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + r#" + + + LOAD-FILE Source + + +
Some other stuff
+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+
    +
  • a
  • +
  • b
  • +
  • c
  • +
+ + +"# + ); + } + + #[test] + fn query_replaced_returns_matching_descendent_of_input() { + let command = ElementCreatingCommand::FromReplaced(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + let root = load_inline_html( + r#"

"#, + ); + + let mut result = command.execute(&vec![root]).unwrap(); + + assert_eq!(result.len(), 1); + + let first_result = result.pop().unwrap(); + let first_result = first_result.borrow(); + assert_eq!( + *first_result, + HtmlContent::Tag(HtmlTag { + name: String::from("aside"), + attributes: BTreeMap::::from([( + String::from("class"), + String::from("test-source") + )]) + }) + ); + } + + #[test] + fn query_replaced_returns_all_matching_descendents_of_input() { + let command = ElementCreatingCommand::FromReplaced(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + let root = load_inline_html( + r#"
+

+ Content 1 +

+ +
+
+
+
+
+
"#, + ); + + let result = command.execute(&vec![root]).unwrap(); + let result = result.iter().map(|n| n.outer_html()).collect::>(); + + assert_eq!(result.len(), 3); + assert!(result.contains(&String::from(r#"Content 1"#))); + assert!(result.contains(&String::from( + r#""# + ))); + assert!(result.contains(&String::from(r#""#))); + } + + #[test] + fn query_replaced_returns_empty_on_no_match() { + let command = ElementCreatingCommand::FromReplaced(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + let root = + load_inline_html(r#"

"#); + + let result = command.execute(&vec![root]).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn query_replaced_returns_empty_on_empty_input() { + let command = ElementCreatingCommand::FromReplaced(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&vec![]).unwrap(); + + assert_eq!(result.len(), 0); + } +} diff --git a/src/element_creating/mod.rs b/src/element_creating/mod.rs new file mode 100644 index 0000000..2ae2fef --- /dev/null +++ b/src/element_creating/mod.rs @@ -0,0 +1,5 @@ +mod command; +mod pipeline; + +pub(crate) use command::ElementCreatingCommand; +pub(crate) use pipeline::ElementCreatingPipeline; diff --git a/src/pipeline/mod.rs b/src/element_creating/pipeline.rs similarity index 53% rename from src/pipeline/mod.rs rename to src/element_creating/pipeline.rs index e2e103e..69248bd 100644 --- a/src/pipeline/mod.rs +++ b/src/element_creating/pipeline.rs @@ -1,31 +1,23 @@ +use crate::element_creating::ElementCreatingCommand; +use crate::element_processing::ElementProcessingCommand; +use crate::{CommandFailedSnafu, HtmlContent, PipelineError}; use log::{trace, warn}; -use snafu::{ResultExt, Snafu}; -use std::fmt::Debug; - -use crate::command::Command; -use crate::html::HtmlContent; - -#[cfg(test)] -mod tests; - -#[derive(Debug, Snafu)] -pub enum PipelineError { - #[snafu(display("Command at index {index} failed"))] - CommandFailed { - index: usize, - #[snafu(backtrace)] - source: crate::command::CommandError, - }, -} +use snafu::ResultExt; #[derive(Debug, PartialEq, Clone)] -pub struct Pipeline<'a>(Vec>); +pub(crate) struct ElementCreatingPipeline<'a>( + ElementCreatingCommand<'a>, + Vec>, +); /// The command pipeline: a list of individual commands /// each to execute on the result of the previous command -impl<'a> Pipeline<'a> { - pub fn new(content: Vec>) -> Self { - Pipeline(content) +impl<'a> ElementCreatingPipeline<'a> { + pub fn new( + creation: ElementCreatingCommand<'a>, + processing: Option>>, + ) -> Self { + ElementCreatingPipeline(creation, processing.unwrap_or(vec![])) } /// execute the pipeline on the given nodes by @@ -36,9 +28,17 @@ impl<'a> Pipeline<'a> { &self, nodes: Vec>, ) -> Result>, PipelineError> { - let mut intermediate = nodes; - let mut command_index: usize = 0; - for command in self.0.iter() { + let mut intermediate = self + .0 + .execute(&nodes) + .context(CommandFailedSnafu { index: 0_usize })?; + + if intermediate.len() == 0 { + warn!("Command resulted in an empty result set"); + } + + let mut command_index: usize = 1; + for command in self.1.iter() { trace!("Running Next: {:#?}", &command); trace!("Current Element Set: {:#?}", &intermediate); diff --git a/src/command.rs b/src/element_processing/command/mod.rs similarity index 54% rename from src/command.rs rename to src/element_processing/command/mod.rs index fc05eb8..c3e0a41 100644 --- a/src/command.rs +++ b/src/element_processing/command/mod.rs @@ -1,104 +1,56 @@ +#[cfg(test)] +mod tests; + use html_escape::{encode_double_quoted_attribute, encode_text}; use log::trace; -use snafu::{Backtrace, ResultExt, Snafu}; +use snafu::ResultExt; use std::fmt::Debug; -use std::fs::File; -use std::io::{BufReader, Read}; use std::ops::Add; -use crate::html::{HtmlContent, HtmlTag}; -use crate::pipeline::PipelineError; -use crate::{CssSelectorList, Pipeline}; - -#[derive(Debug, Snafu)] -pub enum CommandError { - #[snafu(display("Failed to remove HTML node"))] - RemovingNodeFailed { - #[snafu(backtrace)] - source: crate::html::IndexError, - }, - #[snafu(display("Sub-Pipeline failed"))] - SubpipelineFailed { - #[snafu(backtrace)] - #[snafu(source(from(PipelineError, Box::new)))] - source: Box, - }, - #[snafu(display("Failed to read input from"))] - ReadingInputFailed { - source: std::io::Error, - backtrace: Backtrace, - }, - #[snafu(display("Failed to parse input HTML"))] - ParsingInputFailed { - source: tl::ParseError, - backtrace: Backtrace, - }, - #[snafu(display("Failed to convert parsed HTML into memory model"))] - LoadingParsedHtmlFailed { - #[snafu(backtrace)] - source: crate::html::StreamingEditorError, - }, -} - -/// Is the value directly defined or is it a sub-pipeline? -#[derive(Debug, PartialEq, Clone)] -pub enum ValueSource { - StringValue(String), -} - -impl ValueSource { - pub(crate) fn render(&self) -> String { - match self { - ValueSource::StringValue(value) => value.clone(), - } - } -} +use super::pipeline::ElementProcessingPipeline; +use crate::element_creating::ElementCreatingPipeline; +use crate::html::HtmlContent; +use crate::{CommandError, CssSelectorList, SubpipelineFailedSnafu, ValueSource}; #[derive(Debug, PartialEq, Clone)] -pub enum Command<'a> { - /// Find all nodes, beginning at the input, that match the given CSS selector +pub(crate) enum ElementProcessingCommand<'a> { + /// Find all nodes, beginning at the input, that match the given CSS selector and detach them /// and return only those - Only(CssSelectorList<'a>), + ExtractElement(CssSelectorList<'a>), /// Find all nodes, beginning at the input, that match the given CSS selector /// and remove them from their parent nodes. /// Returns the input as result. - Without(CssSelectorList<'a>), + RemoveElement(CssSelectorList<'a>), /// runs a sub-pipeline on each element matching the given CSS selector /// Returns the input as result. - ForEach(CssSelectorList<'a>, Pipeline<'a>), + ForEach(CssSelectorList<'a>, ElementProcessingPipeline<'a>), /// runs a sub-pipeline and replaces each element matching the given CSS selector with the result of the pipeline /// Returns the input as result. - Replace(CssSelectorList<'a>, Pipeline<'a>), + Replace(CssSelectorList<'a>, ElementCreatingPipeline<'a>), /// Remove the given attribute from all currently selected nodes /// Returns the input as result. - ClearAttribute(String), + ClearAttribute(&'a str), /// Remove all children of the currently selected nodes /// Returns the input as result ClearContent, /// Add or Reset a given attribute with a new value /// Returns the input as result. - SetAttribute(String, ValueSource), + SetAttribute(&'a str, ValueSource<'a>), /// Remove all children of the currently selected nodes and add a new text as child instead /// Returns the input as result. - SetTextContent(ValueSource), + SetTextContent(ValueSource<'a>), /// adds a new text as child /// Returns the input as result. - AddTextContent(ValueSource), + AddTextContent(ValueSource<'a>), /// adds a new comment as child /// Returns the input as result. - AddComment(ValueSource), + AddComment(ValueSource<'a>), /// runs a sub-pipeline and adds the result as child /// Returns the input as result. - AddElement(Pipeline<'a>), - /// creates an HTML element of given type - /// Returns the created element as result. - CreateElement(String), - /// reads a different file into memory - /// Returns the content of that file as result. - ReadFrom(String), + AddElement(ElementCreatingPipeline<'a>), } -impl<'a> Command<'a> { +impl<'a> ElementProcessingCommand<'a> { /// perform the action defined by the command on the set of nodes /// and return the calculated results. /// For some command the output can be equal to the input, @@ -108,28 +60,42 @@ impl<'a> Command<'a> { input: &Vec>, ) -> Result>, CommandError> { match self { - Command::Only(selector) => Self::only(input, selector), - Command::Without(selector) => Self::without(input, selector), - Command::ClearAttribute(attribute) => Self::clear_attr(input, attribute), - Command::ClearContent => Self::clear_content(input), - Command::SetAttribute(attribute, value_source) => { + ElementProcessingCommand::ExtractElement(selector) => { + Self::extract_element(input, selector) + } + ElementProcessingCommand::RemoveElement(selector) => { + Self::remove_element(input, selector) + } + ElementProcessingCommand::ClearAttribute(attribute) => { + Self::clear_attr(input, attribute) + } + ElementProcessingCommand::ClearContent => Self::clear_content(input), + ElementProcessingCommand::SetAttribute(attribute, value_source) => { Self::set_attr(input, attribute, value_source) } - Command::SetTextContent(value_source) => Self::set_text_content(input, value_source), - Command::AddTextContent(value_source) => Self::add_text_content(input, value_source), - Command::AddComment(value_source) => Self::add_comment(input, value_source), - Command::ForEach(selector, pipeline) => Self::for_each(input, selector, pipeline), - Command::AddElement(pipeline) => Self::add_element(input, pipeline), - Command::CreateElement(element_name) => Self::create_element(element_name), - Command::Replace(selector, pipeline) => Self::replace(input, selector, pipeline), - Command::ReadFrom(file_path) => Self::read_from(file_path), + ElementProcessingCommand::SetTextContent(value_source) => { + Self::set_text_content(input, value_source) + } + ElementProcessingCommand::AddTextContent(value_source) => { + Self::add_text_content(input, value_source) + } + ElementProcessingCommand::AddComment(value_source) => { + Self::add_comment(input, value_source) + } + ElementProcessingCommand::ForEach(selector, pipeline) => { + Self::for_each(input, selector, pipeline) + } + ElementProcessingCommand::AddElement(pipeline) => Self::add_element(input, pipeline), + ElementProcessingCommand::Replace(selector, pipeline) => { + Self::replace(input, selector, pipeline) + } } } fn for_each( input: &Vec>, selector: &CssSelectorList<'a>, - pipeline: &Pipeline, + pipeline: &ElementProcessingPipeline, ) -> Result>, CommandError> { let queried_elements = selector.query(input); let _ = pipeline.run_on(queried_elements); @@ -137,29 +103,31 @@ impl<'a> Command<'a> { Ok(input.clone()) } - fn only( + fn extract_element( input: &Vec>, selector: &CssSelectorList<'a>, ) -> Result>, CommandError> { - trace!("Running ONLY command using selector: {:#?}", selector); - - let mut matching_elements = selector.query(input); - - for element in &mut matching_elements { - element.detach() - } + trace!( + "Running EXTRACT-ELEMENT command using selector: {:#?}", + selector + ); - Ok(matching_elements) + Ok(selector + .query(input) + .iter() + .map(|e| rctree::Node::clone(e).make_deep_copy()) + .collect::>()) } - fn without( + fn remove_element( input: &Vec>, selector: &CssSelectorList<'a>, ) -> Result>, CommandError> { trace!("Running WITHOUT command using selector: {:#?}", selector); + let findings = selector.query(input); - for mut node in findings { + for node in findings { node.detach(); } @@ -169,12 +137,16 @@ impl<'a> Command<'a> { fn replace( input: &Vec>, selector: &CssSelectorList<'a>, - pipeline: &Pipeline, + pipeline: &ElementCreatingPipeline, ) -> Result>, CommandError> { + trace!("Running REPLACE command using selector: {:#?}", selector); + let queried_elements = selector.query(input); - let mut created_elements = pipeline.run_on(vec![]).context(SubpipelineFailedSnafu)?; - for mut element_for_replacement in queried_elements { + for element_for_replacement in queried_elements { + let mut created_elements = pipeline + .run_on(vec![rctree::Node::clone(&element_for_replacement)]) + .context(SubpipelineFailedSnafu)?; for new_element in &mut created_elements { let copy = new_element.make_deep_copy(); element_for_replacement.insert_before(copy); @@ -187,14 +159,15 @@ impl<'a> Command<'a> { fn clear_attr( input: &Vec>, - attribute: &String, + attr_name: &str, ) -> Result>, CommandError> { - trace!("Running CLEAR-ATTR command for attr: {:#?}", attribute); + trace!("Running CLEAR-ATTR command for attr: {:#?}", attr_name); + let attribute = String::from(attr_name); for node in input { - let mut working_copy = rctree::Node::clone(node); + let working_copy = rctree::Node::clone(node); let mut data = working_copy.borrow_mut(); - data.clear_attribute(attribute); + data.clear_attribute(&attribute); } Ok(input.clone()) @@ -206,7 +179,7 @@ impl<'a> Command<'a> { trace!("Running CLEAR-CONTENT command"); for node in input { - for mut child in node.children() { + for child in node.children() { child.detach() } } @@ -216,7 +189,7 @@ impl<'a> Command<'a> { fn set_attr( input: &Vec>, - attribute: &String, + attribute: &str, value_source: &ValueSource, ) -> Result>, CommandError> { trace!( @@ -226,11 +199,13 @@ impl<'a> Command<'a> { ); for node in input { - let mut working_copy = rctree::Node::clone(node); - let mut data = working_copy.borrow_mut(); - let rendered_value = value_source.render(); + let rendered_value = value_source.render(node).context(SubpipelineFailedSnafu)?; + let rendered_value = rendered_value.join(""); let rendered_value = String::from(encode_double_quoted_attribute(&rendered_value)); let rendered_value = rendered_value.replace("\n", "\\n"); + + let working_copy = rctree::Node::clone(node); + let mut data = working_copy.borrow_mut(); data.set_attribute(attribute, rendered_value); } @@ -248,13 +223,15 @@ impl<'a> Command<'a> { for node in input { // first clear everything that was there before - for mut child in node.children() { + for child in node.children() { child.detach() } - let mut working_copy = rctree::Node::clone(node); - let rendered_value = value_source.render(); + let rendered_value = value_source.render(node).context(SubpipelineFailedSnafu)?; + let rendered_value = rendered_value.join(""); let rendered_value = String::from(encode_text(&rendered_value)); + + let working_copy = rctree::Node::clone(node); working_copy.append(rctree::Node::new(HtmlContent::Text(rendered_value))); } @@ -271,9 +248,11 @@ impl<'a> Command<'a> { ); for node in input { - let mut working_copy = rctree::Node::clone(node); - let rendered_value = value_source.render(); + let rendered_value = value_source.render(node).context(SubpipelineFailedSnafu)?; + let rendered_value = rendered_value.join(""); let rendered_value = String::from(encode_text(&rendered_value)); + + let working_copy = rctree::Node::clone(node); working_copy.append(rctree::Node::new(HtmlContent::Text(rendered_value))); } @@ -290,9 +269,11 @@ impl<'a> Command<'a> { ); for node in input { - let mut working_copy = rctree::Node::clone(node); - let rendered_value = value_source.render(); + let rendered_value = value_source.render(node).context(SubpipelineFailedSnafu)?; + let rendered_value = rendered_value.join(""); let rendered_value = rendered_value.replace("--", "\\x2D\\x2D"); + + let working_copy = rctree::Node::clone(node); working_copy.append(rctree::Node::new(HtmlContent::Comment(rendered_value))); } @@ -301,57 +282,37 @@ impl<'a> Command<'a> { fn add_element( input: &Vec>, - pipeline: &Pipeline, + pipeline: &ElementCreatingPipeline, ) -> Result>, CommandError> { + trace!("Running ADD-ELEMENT command"); + for node in input { if let Some(new_element) = pipeline .run_on(vec![]) .context(SubpipelineFailedSnafu)? .pop() { - let mut working_copy = rctree::Node::clone(node); + let working_copy = rctree::Node::clone(node); working_copy.append(new_element); } } Ok(input.clone()) } - - fn create_element(name: &String) -> Result>, CommandError> { - Ok(vec![rctree::Node::new(HtmlContent::Tag(HtmlTag::of_name( - name.clone(), - )))]) - } - - fn read_from(file_path: &String) -> Result>, CommandError> { - let file = File::open(file_path).context(ReadingInputFailedSnafu)?; - let mut buffered_reader = BufReader::new(file); - - let mut string_content = String::new(); - buffered_reader - .read_to_string(&mut string_content) - .context(ReadingInputFailedSnafu)?; - - let dom = tl::parse(&string_content, tl::ParserOptions::default()) - .context(ParsingInputFailedSnafu)?; - let mut root_element = HtmlContent::import(dom).context(LoadingParsedHtmlFailedSnafu)?; - - Ok(vec![root_element.make_deep_copy()]) - } } -impl<'a> Add> for Command<'a> { - type Output = Vec>; +impl<'a> Add> for ElementProcessingCommand<'a> { + type Output = Vec>; - fn add(self, rhs: Command<'a>) -> Self::Output { + fn add(self, rhs: ElementProcessingCommand<'a>) -> Self::Output { vec![self, rhs] } } -impl<'a> Add>>> for Command<'a> { - type Output = Vec>; +impl<'a> Add>>> for ElementProcessingCommand<'a> { + type Output = Vec>; - fn add(self, rhs: Option>>) -> Self::Output { + fn add(self, rhs: Option>>) -> Self::Output { if let Some(mut vec) = rhs { vec.insert(0, self); return vec; diff --git a/src/element_processing/command/tests.rs b/src/element_processing/command/tests.rs new file mode 100644 index 0000000..c22345c --- /dev/null +++ b/src/element_processing/command/tests.rs @@ -0,0 +1,522 @@ +use crate::element_creating::{ElementCreatingCommand, ElementCreatingPipeline}; +use crate::html::HtmlRenderable; +use crate::string_creating::{ElementSelectingCommand, ValueExtractingCommand}; +use crate::{ + element_processing::{command::ElementProcessingCommand, pipeline::ElementProcessingPipeline}, + load_inline_html, CssSelector, CssSelectorList, CssSelectorPath, CssSelectorStep, + StringValueCreatingPipeline, ValueSource, +}; + +const TEST_HTML_DOCUMENT: &str = r#" + + +

Title

+

Some first text

+

Some more text, even with an

+

Third text of HTML, but no CSS

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"#; + +#[test] +fn extract_command() { + let command = + ElementProcessingCommand::ExtractElement(CssSelectorList::new(vec![CssSelectorPath::new( + CssSelector::for_element("h1"), + vec![CssSelectorStep::adjacent_sibling(CssSelector::for_element( + "p", + ))], + )])); + + let root = load_inline_html(TEST_HTML_DOCUMENT); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"

Some first text

"#) + ); +} + +#[test] +fn remove_command() { + let command = + ElementProcessingCommand::RemoveElement(CssSelectorList::new(vec![CssSelectorPath::new( + CssSelector::for_element("h1"), + vec![CssSelectorStep::adjacent_sibling(CssSelector::for_element( + "p", + ))], + )])); + + let root = load_inline_html(TEST_HTML_DOCUMENT); + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from( + r#" + + +

Title

+ +

Some more text, even with an

+

Third text of HTML, but no CSS

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"# + ) + ); +} + +#[test] +fn clear_attribute() { + let command = ElementProcessingCommand::ClearAttribute("data-test"); + + let root = load_inline_html(r#"
Some Content
"#); + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Some Content
"#) + ); +} + +#[test] +fn clear_content() { + let command = ElementProcessingCommand::ClearContent; + + let root = load_inline_html(r#"
Some Content
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
"#) + ); +} + +#[test] +fn set_attribute_from_string_over_existing_attr() { + let command = + ElementProcessingCommand::SetAttribute("data-test", ValueSource::StringValue("some text")); + + let root = load_inline_html(r#"
Some Content
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Some Content
"#) + ); +} + +#[test] +fn set_attribute_from_string_as_new_attr() { + let command = + ElementProcessingCommand::SetAttribute("data-fubar", ValueSource::StringValue("some text")); + + let root = load_inline_html(r#"
Some Content
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from( + r#"
Some Content
"# + ) + ); +} + +#[test] +fn set_attribute_from_other_attr_as_new_attr() { + let command = ElementProcessingCommand::SetAttribute( + "data-fubar", + ValueSource::SubPipeline(StringValueCreatingPipeline::new( + ElementSelectingCommand::UseElement, + ValueExtractingCommand::GetAttribute("data-test"), + )), + ); + + let root = load_inline_html(r#"
Some Content
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Some Content
"#) + ); +} + +#[test] +fn set_text_content_from_string_for_tag() { + let command = + ElementProcessingCommand::SetTextContent(ValueSource::StringValue("Other Content")); + + let root = load_inline_html(r#"
Some Content
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Other Content
"#) + ); +} + +#[test] +fn set_text_content_from_string_for_empty_tag() { + let command = + ElementProcessingCommand::SetTextContent(ValueSource::StringValue("Other Content")); + + let root = load_inline_html(r#"
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Other Content
"#) + ); +} + +#[test] +fn set_text_content_from_attr_for_empty_tag() { + let command = ElementProcessingCommand::SetTextContent(ValueSource::SubPipeline( + StringValueCreatingPipeline::new( + ElementSelectingCommand::UseElement, + ValueExtractingCommand::GetAttribute("data-test"), + ), + )); + + let root = load_inline_html(r#"
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
foo
"#) + ); +} + +#[test] +fn set_text_content_from_string_for_tag_with_multiple_children() { + let command = + ElementProcessingCommand::SetTextContent(ValueSource::StringValue("Other Content")); + + let root = load_inline_html( + r#"
Some special Content.
"#, + ); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Other Content
"#) + ); +} + +#[test] +fn add_text_content_from_string_for_tag() { + let command = + ElementProcessingCommand::AddTextContent(ValueSource::StringValue("Other Content")); + + let root = load_inline_html(r#"
Some Content
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Some ContentOther Content
"#) + ); +} + +#[test] +fn add_text_content_from_string_for_empty_tag() { + let command = ElementProcessingCommand::AddTextContent(ValueSource::SubPipeline( + StringValueCreatingPipeline::new( + ElementSelectingCommand::UseElement, + ValueExtractingCommand::GetAttribute("data-test"), + ), + )); + + let root = load_inline_html(r#"
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
foo
"#) + ); +} + +#[test] +fn add_text_content_from_attr_for_empty_tag() { + let command = + ElementProcessingCommand::AddTextContent(ValueSource::StringValue("Other Content")); + + let root = load_inline_html(r#"
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Other Content
"#) + ); +} + +#[test] +fn add_text_content_from_string_for_tag_with_multiple_children() { + let command = + ElementProcessingCommand::AddTextContent(ValueSource::StringValue("Other Content")); + + let root = load_inline_html( + r#"
Some special Content.
"#, + ); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from( + r#"
Some special Content. Other Content
"# + ) + ); +} + +#[test] +fn add_comment_from_string_for_tag() { + let command = ElementProcessingCommand::AddComment(ValueSource::StringValue("Other Content")); + + let root = load_inline_html(r#"
Some Content
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from( + r#"
Some Content
"# + ) + ); +} + +#[test] +fn add_comment_from_string_for_empty_tag() { + let command = ElementProcessingCommand::AddComment(ValueSource::StringValue("Other Content")); + + let root = load_inline_html(r#"
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
"#) + ); +} + +#[test] +fn add_comment_from_attr_for_empty_tag() { + let command = ElementProcessingCommand::AddComment(ValueSource::SubPipeline( + StringValueCreatingPipeline::new( + ElementSelectingCommand::UseElement, + ValueExtractingCommand::GetAttribute("data-test"), + ), + )); + + let root = load_inline_html(r#"
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
"#) + ); +} + +#[test] +fn add_comment_from_string_for_tag_with_multiple_children() { + let command = ElementProcessingCommand::AddComment(ValueSource::StringValue("Other Content")); + + let root = load_inline_html( + r#"
Some special Content.
"#, + ); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from( + r#"
Some special Content.
"# + ) + ); +} + +#[test] +fn for_each_on_ul() { + let command = ElementProcessingCommand::ForEach( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element( + "li", + ))]), + ElementProcessingPipeline::new(vec![ElementProcessingCommand::SetAttribute( + "data-test", + ValueSource::StringValue("x"), + )]), + ); + + let root = load_inline_html(r#"
  • 1
  • 2
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
  • 1
  • 2
"#) + ); +} + +#[test] +fn add_element_from_create_for_tag() { + let command = ElementProcessingCommand::AddElement(ElementCreatingPipeline::new( + ElementCreatingCommand::CreateElement("div"), + None, + )); + + let root = load_inline_html(r#"
Some Content
"#); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"
Some Content
"#) + ); +} + +//noinspection DuplicatedCode +#[test] +fn replace_from_create() { + let command = ElementProcessingCommand::Replace( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( + "replace-me", + ))]), + ElementCreatingPipeline::new(ElementCreatingCommand::CreateElement("p"), None), + ); + + let root = load_inline_html( + r#"
Some Content
This will be kept
"#, + ); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"

This will be kept
"#) + ); +} + +//noinspection DuplicatedCode +#[test] +fn replace_using_load_file() { + let command = ElementProcessingCommand::Replace( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( + "replace-me", + ))]), + ElementCreatingPipeline::new( + ElementCreatingCommand::FromFile("tests/single_div.html"), + None, + ), + ); + + let root = load_inline_html( + r#"
Some Content
This will be kept
"#, + ); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from( + r#"
This is new
This will be kept
"# + ) + ); +} + +//noinspection DuplicatedCode +#[test] +fn replace_using_query_replaced() { + let command = ElementProcessingCommand::Replace( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( + "replace-me", + ))]), + ElementCreatingPipeline::new( + ElementCreatingCommand::FromReplaced(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_element("p")), + ])), + None, + ), + ); + + let root = load_inline_html( + r#"
Some

levels

This will be kept
"#, + ); + + let mut result = command.execute(&vec![rctree::Node::clone(&root)]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from( + r#"

Content

levels

This will be kept
"# + ) + ); +} diff --git a/src/element_processing/mod.rs b/src/element_processing/mod.rs new file mode 100644 index 0000000..5583f0d --- /dev/null +++ b/src/element_processing/mod.rs @@ -0,0 +1,5 @@ +mod command; +mod pipeline; + +pub(crate) use command::ElementProcessingCommand; +pub(crate) use pipeline::ElementProcessingPipeline; diff --git a/src/element_processing/pipeline.rs b/src/element_processing/pipeline.rs new file mode 100644 index 0000000..ada897f --- /dev/null +++ b/src/element_processing/pipeline.rs @@ -0,0 +1,126 @@ +use crate::element_processing::command::ElementProcessingCommand; +use log::{trace, warn}; +use snafu::ResultExt; +use std::fmt::Debug; + +use crate::html::HtmlContent; +use crate::{CommandFailedSnafu, PipelineError}; + +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct ElementProcessingPipeline<'a>(Vec>); + +/// The command pipeline: a list of individual commands +/// each to execute on the result of the previous command +impl<'a> ElementProcessingPipeline<'a> { + pub fn new(content: Vec>) -> Self { + ElementProcessingPipeline(content) + } + + /// execute the pipeline on the given nodes by + /// running the first commands on those nodes and all the following commands + /// on their predecessors result. + /// The result of the last command is the result of this pipeline + pub(crate) fn run_on( + &self, + nodes: Vec>, + ) -> Result>, PipelineError> { + let mut intermediate = nodes; + let mut command_index: usize = 0; + for command in self.0.iter() { + trace!("Running Next: {:#?}", &command); + trace!("Current Element Set: {:#?}", &intermediate); + + intermediate = command.execute(&intermediate).context(CommandFailedSnafu { + index: command_index, + })?; + command_index += 1; + + if intermediate.len() == 0 { + warn!("Command resulted in an empty result set"); + } + } + + return Ok(intermediate); + } +} + +#[cfg(test)] +mod tests { + use crate::html::HtmlRenderable; + use crate::{ + element_processing::{ + command::ElementProcessingCommand, pipeline::ElementProcessingPipeline, + }, + CssSelector, CssSelectorList, CssSelectorPath, CssSelectorStep, HtmlContent, + }; + + const TEST_HTML_DOCUMENT: &str = r#" + + +

Title

+

Some first text

+

Some more text, even with an

+

Third text of HTML, but no CSS

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"#; + + #[test] + fn run_on_single_command() { + let pipeline = + ElementProcessingPipeline::new(vec![ElementProcessingCommand::ExtractElement( + CssSelectorList::new(vec![CssSelectorPath::new( + CssSelector::for_element("h1"), + vec![CssSelectorStep::adjacent_sibling(CssSelector::for_element( + "p", + ))], + )]), + )]); + + let dom = tl::parse(TEST_HTML_DOCUMENT, tl::ParserOptions::default()).unwrap(); + let starting_elements = HtmlContent::import(dom).unwrap(); + + let mut result = pipeline + .run_on(vec![rctree::Node::clone(&starting_elements)]) + .unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"

Some first text

"#) + ); + } + + #[test] + fn run_on_two_commands() { + let pipeline = ElementProcessingPipeline::new(vec![ + ElementProcessingCommand::ExtractElement(CssSelectorList::new(vec![ + CssSelectorPath::new( + CssSelector::for_element("h1"), + vec![CssSelectorStep::adjacent_sibling(CssSelector::for_element( + "p", + ))], + ), + ])), + ElementProcessingCommand::RemoveElement(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_element("em")), + ])), + ]); + + let dom = tl::parse(TEST_HTML_DOCUMENT, tl::ParserOptions::default()).unwrap(); + let starting_elements = HtmlContent::import(dom).unwrap(); + let mut result = pipeline.run_on(vec![starting_elements]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!( + first_result.outer_html(), + String::from(r#"

Some text

"#) + ); + } +} diff --git a/src/html/mod.rs b/src/html/mod.rs index 2071b31..9dae1dd 100644 --- a/src/html/mod.rs +++ b/src/html/mod.rs @@ -10,6 +10,7 @@ use tl::{HTMLTag, NodeHandle, Parser, VDom}; mod tests; #[derive(Debug, Snafu)] +#[snafu(visibility(pub(crate)))] pub enum StreamingEditorError { #[snafu(display("Nothing Imported from tl"))] NothingImported { backtrace: Backtrace }, @@ -162,8 +163,7 @@ impl HtmlContent { attributes.insert(String::from(key), value_string); } - let mut converted = - Node::::new(HtmlContent::Tag(HtmlTag { name, attributes })); + let converted = Node::::new(HtmlContent::Tag(HtmlTag { name, attributes })); for child in tag.children().top().iter() { converted.append(Self::convert_node(child, parser)?) @@ -267,11 +267,24 @@ impl HtmlContent { } } - pub(crate) fn set_attribute(&mut self, attribute: &String, value: String) { + pub(crate) fn set_attribute(&mut self, attribute: impl Into, value: impl Into) { match self { HtmlContent::Comment(_) | HtmlContent::Text(_) => (), HtmlContent::Tag(tag) => { - tag.attributes.insert(attribute.clone(), value); + tag.attributes.insert(attribute.into(), value.into()); + } + } + } + + pub(crate) fn get_attribute(&self, attribute: &String) -> Option { + match self { + HtmlContent::Comment(_) | HtmlContent::Text(_) => None, + HtmlContent::Tag(tag) => { + if let Some(value) = tag.attributes.get(attribute) { + Some(value.clone()) + } else { + None + } } } } @@ -327,6 +340,7 @@ impl HtmlRenderable for Node { } #[derive(Debug, Snafu)] +#[snafu(visibility(pub(crate)))] pub enum IndexError { #[snafu(display("Index seems out of date. NodeHandle couldn't be found in Parser"))] OutdatedIndex { backtrace: Backtrace }, diff --git a/src/html/tests.rs b/src/html/tests.rs index 72d5aa3..132140c 100644 --- a/src/html/tests.rs +++ b/src/html/tests.rs @@ -28,25 +28,25 @@ fn build_tag_with_attr() -> rctree::Node { } fn build_tag_with_comment() -> rctree::Node { - let mut unit_of_test = build_tag(); + let unit_of_test = build_tag(); unit_of_test.append(build_comment()); unit_of_test } fn build_tag_with_text() -> rctree::Node { - let mut unit_of_test = build_tag(); + let unit_of_test = build_tag(); unit_of_test.append(build_text()); unit_of_test } fn build_tag_with_complex_content() -> rctree::Node { - let mut unit_of_test = build_tag(); + let unit_of_test = build_tag(); unit_of_test.append(build_text()); unit_of_test.append(build_comment()); - let mut child_tag = build_tag_with_attr(); + let child_tag = build_tag_with_attr(); child_tag.append(build_text_with_content("Other Text")); unit_of_test.append(child_tag); @@ -251,7 +251,7 @@ fn convert_single_vdom_works() { .unwrap(); let converted = HtmlContent::import(dom).unwrap(); - let mut expected = rctree::Node::::new(HtmlContent::Tag(HtmlTag::of_name("html"))); + let expected = rctree::Node::::new(HtmlContent::Tag(HtmlTag::of_name("html"))); expected.append(rctree::Node::::new(HtmlContent::Tag( HtmlTag::of_name("head"), ))); @@ -259,7 +259,7 @@ fn convert_single_vdom_works() { String::from("nothing here"), ))); - let mut body = rctree::Node::::new(HtmlContent::Tag(HtmlTag { + let body = rctree::Node::::new(HtmlContent::Tag(HtmlTag { name: String::from("body"), attributes: BTreeMap::::from([ (String::from("class"), String::from("simple")), @@ -284,7 +284,7 @@ fn convert_empty_comments_works() { let converted = HtmlContent::import(dom).unwrap(); - let mut body = rctree::Node::::new(HtmlContent::Tag(HtmlTag::of_name("body"))); + let body = rctree::Node::::new(HtmlContent::Tag(HtmlTag::of_name("body"))); body.append(rctree::Node::::new(HtmlContent::Text( String::from("Hello "), ))); diff --git a/src/lib.rs b/src/lib.rs index 8e803ce..0e46aee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,23 +1,22 @@ use log::debug; use peg::str::LineCol; use snafu::{Backtrace, ResultExt, Snafu}; -use std::io::{BufRead, Write}; +use std::fs::File; +use std::io::{BufRead, BufReader, Read, Write}; -pub use crate::command::{Command, ValueSource}; pub(crate) use crate::css::{ CssAttributeComparison, CssAttributeSelector, CssPseudoClass, CssSelector, CssSelectorList, CssSelectorPath, CssSelectorStep, }; use crate::html::{HtmlContent, HtmlRenderable}; +use crate::string_creating::StringValueCreatingPipeline; -pub use crate::parsing::grammar; -pub use crate::pipeline::Pipeline; - -mod command; mod css; +mod element_creating; +mod element_processing; mod html; mod parsing; -mod pipeline; +mod string_creating; #[derive(Debug, Snafu)] pub enum StreamingEditorError { @@ -54,7 +53,49 @@ pub enum StreamingEditorError { #[snafu(display("Failed to run pipeline"))] RunningPipelineFailed { #[snafu(backtrace)] - source: crate::pipeline::PipelineError, + source: PipelineError, + }, +} + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub(crate)))] +pub enum PipelineError { + #[snafu(display("Command at index {index} failed"))] + CommandFailed { + index: usize, + #[snafu(backtrace)] + source: CommandError, + }, +} + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub(crate)))] +pub enum CommandError { + #[snafu(display("Failed to remove HTML node"))] + RemovingNodeFailed { + #[snafu(backtrace)] + source: crate::html::IndexError, + }, + #[snafu(display("Sub-Pipeline failed"))] + SubpipelineFailed { + #[snafu(backtrace)] + #[snafu(source(from(PipelineError, Box::new)))] + source: Box, + }, + #[snafu(display("Failed to read input from"))] + ReadingCommandInputFailed { + source: std::io::Error, + backtrace: Backtrace, + }, + #[snafu(display("Failed to parse input HTML"))] + ParsingCommandInputFailed { + source: tl::ParseError, + backtrace: Backtrace, + }, + #[snafu(display("Failed to convert parsed HTML into memory model"))] + LoadingParsedCommandHtmlFailed { + #[snafu(backtrace)] + source: crate::html::StreamingEditorError, }, } @@ -119,3 +160,44 @@ where eprintln!("{}", backtrace); } } + +/// Is the value directly defined or is it a sub-pipeline? +#[derive(Debug, PartialEq, Clone)] +pub(crate) enum ValueSource<'a> { + StringValue(&'a str), + SubPipeline(StringValueCreatingPipeline<'a>), +} + +impl<'a> ValueSource<'a> { + pub fn render( + &self, + element: &rctree::Node, + ) -> Result, PipelineError> { + match self { + ValueSource::StringValue(value) => Ok(vec![String::from(*value)]), + ValueSource::SubPipeline(pipeline) => pipeline.run_on(element), + } + } +} + +pub(crate) fn load_html_file(file_path: &str) -> Result, CommandError> { + let file = File::open(file_path).context(ReadingCommandInputFailedSnafu)?; + let mut buffered_reader = BufReader::new(file); + + let mut string_content = String::new(); + buffered_reader + .read_to_string(&mut string_content) + .context(ReadingCommandInputFailedSnafu)?; + + let dom = tl::parse(&string_content, tl::ParserOptions::default()) + .context(ParsingCommandInputFailedSnafu)?; + + HtmlContent::import(dom).context(LoadingParsedCommandHtmlFailedSnafu) +} + +#[cfg(test)] +pub(crate) fn load_inline_html(html: &str) -> rctree::Node { + let dom = tl::parse(html, tl::ParserOptions::default()).unwrap(); + + HtmlContent::import(dom).unwrap() +} diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index d752311..9cdaf20 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -1,8 +1,13 @@ use peg::parser; use crate::{ - Command, CssAttributeComparison, CssAttributeSelector, CssPseudoClass, CssSelector, - CssSelectorList, CssSelectorPath, CssSelectorStep, Pipeline, ValueSource, + element_creating::{ElementCreatingCommand, ElementCreatingPipeline}, + element_processing::{ElementProcessingCommand, ElementProcessingPipeline}, + string_creating::{ + ElementSelectingCommand, StringValueCreatingPipeline, ValueExtractingCommand, + }, + CssAttributeComparison, CssAttributeSelector, CssPseudoClass, CssSelector, CssSelectorList, + CssSelectorPath, CssSelectorStep, ValueSource, }; #[cfg(test)] @@ -35,7 +40,7 @@ parser! { / "=>" rule number() -> usize = n:$(['0'..='9']+) { n.parse().unwrap() } - pub(crate) rule identifier() -> &'input str + pub(super) rule identifier() -> &'input str = i:$(['a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' ]+) { i } rule css_attribute() -> CssAttributeSelector<'input> = "[" a:(identifier()) "]" { CssAttributeSelector::<'input> { attribute: a, operator: CssAttributeComparison::Exist, value: None } } @@ -80,39 +85,40 @@ parser! { pub(crate) rule css_selector_list() -> CssSelectorList<'input> = v:(css_selector_path() ++ ",") { CssSelectorList::new(v) } - pub(crate) rule string_value() -> &'input str + pub(super) rule string_value() -> &'input str = "\"" s:$([^'"']+) "\"" { s } / "'" s:$([^'\'']+) "'" { s } / "?" s:$([^'?']+) "?" { s } - rule only_command() -> Command<'input> - = ("ONLY" / "SELECT") "{" whitespace()? oc:css_selector_list() whitespace()? "}" { Command::Only(oc) } - rule without_command() -> Command<'input> - = ("WITHOUT" / "FILTER") "{" whitespace()? oc:css_selector_list() whitespace()? "}" { Command::Without(oc) } - rule for_each_command() -> Command<'input> - = "FOR" "-EACH"? "{" whitespace()? oc:css_selector_list() whitespace()? iterate_marker() whitespace()? sp:pipeline() whitespace()? "}" { Command::ForEach(oc, sp) } - rule replace_command() -> Command<'input> - = ("REPLACE"/"MAP") "{" whitespace()? oc:css_selector_list() whitespace()? assign_marker() whitespace()? sp:element_creating_pipeline() whitespace()? "}" { Command::Replace(oc, sp)} - rule clear_attr_command() -> Command<'input> - = "CLEAR-ATTR{" whitespace()? a:identifier() whitespace()? "}" { Command::ClearAttribute(String::from(a)) } - rule clear_content_command() -> Command<'input> - = "CLEAR-CONTENT" { Command::ClearContent } - rule set_attr_command() -> Command<'input> - = "SET-ATTR{" whitespace()? a:identifier() whitespace()? assign_marker() whitespace()? v:string_value() "}" { Command::SetAttribute(String::from(a), ValueSource::StringValue(String::from(v))) } - rule set_text_content_command() -> Command<'input> - = "SET-TEXT-CONTENT{" whitespace()? (assign_marker() whitespace()?)? v:string_value() "}" { Command::SetTextContent(ValueSource::StringValue(String::from(v))) } - rule add_text_content_command() -> Command<'input> - = "ADD-TEXT-CONTENT{" whitespace()? (assign_marker() whitespace()?)? v:string_value() "}" { Command::AddTextContent(ValueSource::StringValue(String::from(v))) } - rule add_comment_command() -> Command<'input> - = "ADD-COMMENT{" whitespace()? (assign_marker() whitespace()?)? v:string_value() "}" { Command::AddComment(ValueSource::StringValue(String::from(v))) } - rule add_element_command() -> Command<'input> - = "ADD-ELEMENT{" whitespace()? (assign_marker() whitespace()?)? sp:element_creating_pipeline() whitespace()? "}" { Command::AddElement(sp) } - rule create_element_command() -> Command<'input> - = ("CREATE-ELEMENT"/"NEW") "{" whitespace()? n:identifier() whitespace()? "}" { Command::CreateElement(String::from(n))} - rule read_from_command() -> Command<'input> - = ("READ-FROM"/"SOURCE") "{" whitespace()? f:string_value() whitespace()? "}" { Command::ReadFrom(String::from(f)) } - pub(super) rule command() -> Command<'input> - = only_command() - / without_command() + + rule value_source() -> ValueSource<'input> + = v:string_value() { ValueSource::StringValue(v) } + / p:string_creating_pipeline() { ValueSource::SubPipeline(p) } + + rule extract_element_command() -> ElementProcessingCommand<'input> + = ("EXTRACT-ELEMENT" / "ONLY") "{" whitespace()? oc:css_selector_list() whitespace()? "}" { ElementProcessingCommand::ExtractElement(oc) } + rule remove_element_command() -> ElementProcessingCommand<'input> + = ("REMOVE-ELEMENT" / "WITHOUT") "{" whitespace()? oc:css_selector_list() whitespace()? "}" { ElementProcessingCommand::RemoveElement(oc) } + rule for_each_command() -> ElementProcessingCommand<'input> + = ("FOR-EACH"/"WITH") "{" whitespace()? oc:css_selector_list() whitespace()? iterate_marker() whitespace()? sp:pipeline() whitespace()? "}" { ElementProcessingCommand::ForEach(oc, sp) } + rule replace_command() -> ElementProcessingCommand<'input> + = ("REPLACE"/"MAP") "{" whitespace()? oc:css_selector_list() whitespace()? assign_marker() whitespace()? sp:element_subselect_or_creating_pipeline() whitespace()? "}" { ElementProcessingCommand::Replace(oc, sp)} + rule clear_attr_command() -> ElementProcessingCommand<'input> + = "CLEAR-ATTR{" whitespace()? a:identifier() whitespace()? "}" { ElementProcessingCommand::ClearAttribute(a) } + rule clear_content_command() -> ElementProcessingCommand<'input> + = "CLEAR-CONTENT" { ElementProcessingCommand::ClearContent } + rule set_attr_command() -> ElementProcessingCommand<'input> + = "SET-ATTR{" whitespace()? a:identifier() whitespace()? assign_marker() whitespace()? v:value_source() whitespace()? "}" { ElementProcessingCommand::SetAttribute(a, v) } + rule set_text_content_command() -> ElementProcessingCommand<'input> + = "SET-TEXT-CONTENT{" whitespace()? (assign_marker() whitespace()?)? v:value_source() whitespace()? "}" { ElementProcessingCommand::SetTextContent(v) } + rule add_text_content_command() -> ElementProcessingCommand<'input> + = "ADD-TEXT-CONTENT{" whitespace()? (assign_marker() whitespace()?)? v:value_source() whitespace()? "}" { ElementProcessingCommand::AddTextContent(v) } + rule add_comment_command() -> ElementProcessingCommand<'input> + = "ADD-COMMENT{" whitespace()? (assign_marker() whitespace()?)? v:value_source() whitespace()? "}" { ElementProcessingCommand::AddComment(v) } + rule add_element_command() -> ElementProcessingCommand<'input> + = "ADD-ELEMENT{" whitespace()? (assign_marker() whitespace()?)? sp:element_creating_pipeline() whitespace()? "}" { ElementProcessingCommand::AddElement(sp) } + pub(super) rule element_processing_command() -> ElementProcessingCommand<'input> + = extract_element_command() + / remove_element_command() / for_each_command() / clear_attr_command() / clear_content_command() @@ -122,14 +128,54 @@ parser! { / add_comment_command() / add_element_command() / replace_command() - rule element_source_command() -> Command<'input> + + rule create_element_command() -> ElementCreatingCommand<'input> + = ("CREATE-ELEMENT"/"NEW") "{" whitespace()? n:identifier() whitespace()? "}" { ElementCreatingCommand::CreateElement(n)} + rule load_file_command() -> ElementCreatingCommand<'input> + = ("LOAD-FILE"/"SOURCE") "{" whitespace()? f:string_value() whitespace()? "}" { ElementCreatingCommand::FromFile(f) } + rule element_creating_command() -> ElementCreatingCommand<'input> = create_element_command() - / read_from_command() - rule element_manipulating_pipeline() -> Vec> - = " | " p:(command() ** " | ") { p } - rule element_creating_pipeline() -> Pipeline<'input> - = s:element_source_command() p:element_manipulating_pipeline()? { Pipeline::new(s + p) } - pub rule pipeline() -> Pipeline<'input> - = p:(command() ** " | ") { Pipeline::new(p) } + / load_file_command() + rule element_creating_pipeline() -> ElementCreatingPipeline<'input> + = s:element_creating_command() p:element_manipulating_subpipeline()? { ElementCreatingPipeline::new(s, p) } + rule element_manipulating_subpipeline() -> Vec> + = " | " p:(element_processing_command() ** " | ") { p } + + rule query_replaced_command() -> ElementCreatingCommand<'input> + = ("QUERY-REPLACED"/"KEEP") "{" whitespace()? oc:css_selector_list() whitespace()? "}" { ElementCreatingCommand::FromReplaced(oc) } + rule element_subselect_or_creating_category() -> ElementCreatingCommand<'input> + = query_replaced_command() + / element_creating_command() + rule element_subselect_or_creating_pipeline() -> ElementCreatingPipeline<'input> + = s:element_subselect_or_creating_category() p:element_manipulating_subpipeline()? { ElementCreatingPipeline::new(s, p) } + + rule use_element_command() -> ElementSelectingCommand<'input> + = ("USE-ELEMENT"/"THIS") { ElementSelectingCommand::UseElement } + rule use_parent_command() -> ElementSelectingCommand<'input> + = ("USE-PARENT"/"PARENT") { ElementSelectingCommand::UseParent } + rule query_element_command() -> ElementSelectingCommand<'input> + = "QUERY-ELEMENT{" whitespace()? oc:css_selector_list() whitespace()? "}" { ElementSelectingCommand::QueryElement(oc) } + rule query_parent_command() -> ElementSelectingCommand<'input> + = "QUERY-PARENT{" whitespace()? oc:css_selector_list() whitespace()? "}" { ElementSelectingCommand::QueryParent(oc) } + rule query_root_command() -> ElementSelectingCommand<'input> + = "QUERY-ROOT{" whitespace()? oc:css_selector_list() whitespace()? "}" { ElementSelectingCommand::QueryRoot(oc) } + pub(super) rule element_selecting_command() -> ElementSelectingCommand<'input> + = use_element_command() + / use_parent_command() + / query_element_command() + / query_parent_command() + / query_root_command() + rule get_attr_command() -> ValueExtractingCommand<'input> + = "GET-ATTR{" whitespace()? a:identifier() whitespace()? "}" { ValueExtractingCommand::GetAttribute(a) } + rule get_text_content_command() -> ValueExtractingCommand<'input> + = "GET-TEXT-CONTENT" { ValueExtractingCommand::GetTextContent } + pub(super) rule value_extracting_command() -> ValueExtractingCommand<'input> + = get_attr_command() + / get_text_content_command() + pub(super) rule string_creating_pipeline() -> StringValueCreatingPipeline<'input> + = s:element_selecting_command() " | " e:value_extracting_command() { StringValueCreatingPipeline::new(s, e) } + + pub(crate) rule pipeline() -> ElementProcessingPipeline<'input> + = p:(element_processing_command() ** " | ") { ElementProcessingPipeline::new(p) } } } diff --git a/src/parsing/tests.rs b/src/parsing/tests.rs index ac342f3..01a347c 100644 --- a/src/parsing/tests.rs +++ b/src/parsing/tests.rs @@ -1,4 +1,15 @@ -use crate::{Command, CssSelector, CssSelectorList, CssSelectorPath, Pipeline, ValueSource}; +use crate::element_creating::{ElementCreatingCommand, ElementCreatingPipeline}; +use crate::element_processing::{ElementProcessingCommand, ElementProcessingPipeline}; +use crate::string_creating::{ + ElementSelectingCommand, StringValueCreatingPipeline, ValueExtractingCommand, +}; +use crate::{CssSelector, CssSelectorList, CssSelectorPath, ValueSource}; + +const EXEMPLARY_SUB_PIPELINE_DEFINITION: &str = "USE-ELEMENT | GET-ATTR{data-test}"; +const EXEMPLARY_SUB_PIPELINE_MODEL: StringValueCreatingPipeline = StringValueCreatingPipeline::new( + ElementSelectingCommand::UseElement, + ValueExtractingCommand::GetAttribute("data-test"), +); #[test] fn parse_value_simple_doublequotes() { @@ -61,246 +72,303 @@ fn parse_value_questionmarked_cant_have_questionmarks() { } #[test] -fn parse_single_only() { - let parsed = super::grammar::command("ONLY{a}"); +fn parse_single_extract_element() { + let parsed = super::grammar::element_processing_command("EXTRACT-ELEMENT{a}"); assert_eq!( parsed, - Ok(Command::Only(CssSelectorList::new(vec![ - CssSelectorPath::single(CssSelector::for_element("a")) - ]))) + Ok(ElementProcessingCommand::ExtractElement( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element("a"))]) + )) ); } #[test] -fn parse_single_select_alias() { - let parsed = super::grammar::command("SELECT{a}"); +fn parse_single_extract_element_alias_only() { + let parsed = super::grammar::element_processing_command("ONLY{a}"); assert_eq!( parsed, - Ok(Command::Only(CssSelectorList::new(vec![ - CssSelectorPath::single(CssSelector::for_element("a")) - ]))) + Ok(ElementProcessingCommand::ExtractElement( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element("a"))]) + )) ); } #[test] -fn parse_single_without() { - let parsed = super::grammar::command("WITHOUT{a}"); +fn parse_single_remove_element() { + let parsed = super::grammar::element_processing_command("REMOVE-ELEMENT{a}"); assert_eq!( parsed, - Ok(Command::Without(CssSelectorList::new(vec![ - CssSelectorPath::single(CssSelector::for_element("a")) - ]))) + Ok(ElementProcessingCommand::RemoveElement( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element("a"))]) + )) ); } #[test] -fn parse_single_filter_alias() { - let parsed = super::grammar::command("FILTER{a}"); +fn parse_single_remove_element_alias_without() { + let parsed = super::grammar::element_processing_command("WITHOUT{a}"); assert_eq!( parsed, - Ok(Command::Without(CssSelectorList::new(vec![ - CssSelectorPath::single(CssSelector::for_element("a")) - ]))) + Ok(ElementProcessingCommand::RemoveElement( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element("a"))]) + )) ); } #[test] fn parse_two_grammar() { - let parsed = super::grammar::pipeline("ONLY{a} | WITHOUT{b}"); + let parsed = super::grammar::pipeline("EXTRACT-ELEMENT{a} | REMOVE-ELEMENT{b}"); assert_eq!( parsed, - Ok(Pipeline::new(vec![ - Command::Only(CssSelectorList::new(vec![CssSelectorPath::single( - CssSelector::for_element("a") - )])), - Command::Without(CssSelectorList::new(vec![CssSelectorPath::single( - CssSelector::for_element("b") - )])), + Ok(ElementProcessingPipeline::new(vec![ + ElementProcessingCommand::ExtractElement(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_element("a")) + ])), + ElementProcessingCommand::RemoveElement(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_element("b")) + ])), ])) ); } #[test] fn parse_single_clear_attr() { - let parsed = super::grammar::command("CLEAR-ATTR{a}"); - assert_eq!(parsed, Ok(Command::ClearAttribute(String::from("a")))); + let parsed = super::grammar::element_processing_command("CLEAR-ATTR{a}"); + assert_eq!(parsed, Ok(ElementProcessingCommand::ClearAttribute("a"))); } #[test] fn parse_single_clear_content() { - let parsed = super::grammar::command("CLEAR-CONTENT"); - assert_eq!(parsed, Ok(Command::ClearContent)); + let parsed = super::grammar::element_processing_command("CLEAR-CONTENT"); + assert_eq!(parsed, Ok(ElementProcessingCommand::ClearContent)); } #[test] fn parse_single_set_attr_by_string() { - let parsed = super::grammar::command("SET-ATTR{data-test ↤ 'some text'}"); + let parsed = super::grammar::element_processing_command("SET-ATTR{data-test ↤ 'some text'}"); assert_eq!( parsed, - Ok(Command::SetAttribute( - String::from("data-test"), - ValueSource::StringValue(String::from("some text")) + Ok(ElementProcessingCommand::SetAttribute( + "data-test", + ValueSource::StringValue("some text") )) ); } #[test] fn parse_single_set_attr_by_string_with_ascii_arrow() { - let parsed = super::grammar::command("SET-ATTR{data-test <= 'some text'}"); + let parsed = super::grammar::element_processing_command("SET-ATTR{data-test <= 'some text'}"); assert_eq!( parsed, - Ok(Command::SetAttribute( - String::from("data-test"), - ValueSource::StringValue(String::from("some text")) + Ok(ElementProcessingCommand::SetAttribute( + "data-test", + ValueSource::StringValue("some text") + )) + ); +} + +#[test] +fn parse_single_set_attr_by_sub_pipeline() { + let constructed_pipeline = format!( + "SET-ATTR{{data-test ↤ {} }}", + EXEMPLARY_SUB_PIPELINE_DEFINITION + ); + let parsed = super::grammar::element_processing_command(&constructed_pipeline); + + assert_eq!( + parsed, + Ok(ElementProcessingCommand::SetAttribute( + "data-test", + ValueSource::SubPipeline(EXEMPLARY_SUB_PIPELINE_MODEL.clone()) )) ); } #[test] fn parse_single_set_text_content_by_string() { - let parsed = super::grammar::command("SET-TEXT-CONTENT{'some text'}"); + let parsed = super::grammar::element_processing_command("SET-TEXT-CONTENT{'some text'}"); assert_eq!( parsed, - Ok(Command::SetTextContent(ValueSource::StringValue( - String::from("some text") - ))) + Ok(ElementProcessingCommand::SetTextContent( + ValueSource::StringValue("some text") + )) ); } #[test] fn parse_single_set_text_content_by_string_with_arrow() { - let parsed = super::grammar::command("SET-TEXT-CONTENT{ ↤ 'some text'}"); + let parsed = super::grammar::element_processing_command("SET-TEXT-CONTENT{ ↤ 'some text'}"); assert_eq!( parsed, - Ok(Command::SetTextContent(ValueSource::StringValue( - String::from("some text") - ))) + Ok(ElementProcessingCommand::SetTextContent( + ValueSource::StringValue("some text") + )) + ); +} + +#[test] +fn parse_single_set_text_content_by_sub_pipeline() { + let constructed_pipeline = format!( + "SET-TEXT-CONTENT{{ {} }}", + EXEMPLARY_SUB_PIPELINE_DEFINITION + ); + let parsed = super::grammar::element_processing_command(&constructed_pipeline); + + assert_eq!( + parsed, + Ok(ElementProcessingCommand::SetTextContent( + ValueSource::SubPipeline(EXEMPLARY_SUB_PIPELINE_MODEL.clone()) + )) ); } #[test] fn parse_single_set_text_content_by_string_with_ascii_arrow() { - let parsed = super::grammar::command("SET-TEXT-CONTENT{ <= 'some text'}"); + let parsed = super::grammar::element_processing_command("SET-TEXT-CONTENT{ <= 'some text'}"); assert_eq!( parsed, - Ok(Command::SetTextContent(ValueSource::StringValue( - String::from("some text") - ))) + Ok(ElementProcessingCommand::SetTextContent( + ValueSource::StringValue("some text") + )) ); } #[test] fn parse_single_add_text_content_by_string() { - let parsed = super::grammar::command("ADD-TEXT-CONTENT{'some text'}"); + let parsed = super::grammar::element_processing_command("ADD-TEXT-CONTENT{'some text'}"); assert_eq!( parsed, - Ok(Command::AddTextContent(ValueSource::StringValue( - String::from("some text") - ))) + Ok(ElementProcessingCommand::AddTextContent( + ValueSource::StringValue("some text") + )) ); } #[test] fn parse_single_add_text_content_by_string_with_arrow() { - let parsed = super::grammar::command("ADD-TEXT-CONTENT{ ↤ 'some text'}"); + let parsed = super::grammar::element_processing_command("ADD-TEXT-CONTENT{ ↤ 'some text'}"); + assert_eq!( + parsed, + Ok(ElementProcessingCommand::AddTextContent( + ValueSource::StringValue("some text") + )) + ); +} + +#[test] +fn parse_single_add_text_content_by_sub_pipeline() { + let constructed_pipeline = format!( + "ADD-TEXT-CONTENT{{ {} }}", + EXEMPLARY_SUB_PIPELINE_DEFINITION + ); + let parsed = super::grammar::element_processing_command(&constructed_pipeline); + assert_eq!( parsed, - Ok(Command::AddTextContent(ValueSource::StringValue( - String::from("some text") - ))) + Ok(ElementProcessingCommand::AddTextContent( + ValueSource::SubPipeline(EXEMPLARY_SUB_PIPELINE_MODEL.clone()) + )) ); } #[test] fn parse_single_add_text_content_by_string_with_ascii_arrow() { - let parsed = super::grammar::command("ADD-TEXT-CONTENT{ <= 'some text'}"); + let parsed = super::grammar::element_processing_command("ADD-TEXT-CONTENT{ <= 'some text'}"); assert_eq!( parsed, - Ok(Command::AddTextContent(ValueSource::StringValue( - String::from("some text") - ))) + Ok(ElementProcessingCommand::AddTextContent( + ValueSource::StringValue("some text") + )) ); } #[test] fn parse_single_add_comment_by_string() { - let parsed = super::grammar::command("ADD-COMMENT{'some text'}"); + let parsed = super::grammar::element_processing_command("ADD-COMMENT{'some text'}"); assert_eq!( parsed, - Ok(Command::AddComment(ValueSource::StringValue(String::from( - "some text" - )))) + Ok(ElementProcessingCommand::AddComment( + ValueSource::StringValue("some text") + )) ); } #[test] fn parse_single_add_comment_by_string_with_arrow() { - let parsed = super::grammar::command("ADD-COMMENT{ ↤ 'some text'}"); + let parsed = super::grammar::element_processing_command("ADD-COMMENT{ ↤ 'some text'}"); assert_eq!( parsed, - Ok(Command::AddComment(ValueSource::StringValue(String::from( - "some text" - )))) + Ok(ElementProcessingCommand::AddComment( + ValueSource::StringValue("some text") + )) ); } #[test] fn parse_single_add_comment_by_string_with_ascii_arrow() { - let parsed = super::grammar::command("ADD-COMMENT{ <= 'some text'}"); + let parsed = super::grammar::element_processing_command("ADD-COMMENT{ <= 'some text'}"); assert_eq!( parsed, - Ok(Command::AddComment(ValueSource::StringValue(String::from( - "some text" - )))) + Ok(ElementProcessingCommand::AddComment( + ValueSource::StringValue("some text") + )) ); } +//noinspection DuplicatedCode #[test] -fn parse_single_for_using_set_attr() { - let parsed = super::grammar::command("FOR{li ↦ SET-ATTR{data-test ↤ 'some text'}}"); +fn parse_single_for_each_alias_with_using_set_attr() { + let parsed = + super::grammar::element_processing_command("WITH{li ↦ SET-ATTR{data-test ↤ 'some text'}}"); assert_eq!( parsed, - Ok(Command::ForEach( + Ok(ElementProcessingCommand::ForEach( CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element( "li" ))]), - Pipeline::new(vec![Command::SetAttribute( - String::from("data-test"), - ValueSource::StringValue(String::from("some text")) + ElementProcessingPipeline::new(vec![ElementProcessingCommand::SetAttribute( + "data-test", + ValueSource::StringValue("some text") )]), )) ); } +//noinspection DuplicatedCode #[test] fn parse_single_for_each_using_set_attr() { - let parsed = super::grammar::command("FOR-EACH{li ↦ SET-ATTR{data-test ↤ 'some text'}}"); + let parsed = super::grammar::element_processing_command( + "FOR-EACH{li ↦ SET-ATTR{data-test ↤ 'some text'}}", + ); assert_eq!( parsed, - Ok(Command::ForEach( + Ok(ElementProcessingCommand::ForEach( CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element( "li" ))]), - Pipeline::new(vec![Command::SetAttribute( - String::from("data-test"), - ValueSource::StringValue(String::from("some text")) + ElementProcessingPipeline::new(vec![ElementProcessingCommand::SetAttribute( + "data-test", + ValueSource::StringValue("some text") )]), )) ); } +//noinspection DuplicatedCode #[test] fn parse_single_for_each_with_ascii_arrow_using_set_attr() { - let parsed = super::grammar::command("FOR-EACH{li => SET-ATTR{data-test ↤ 'some text'}}"); + let parsed = super::grammar::element_processing_command( + "FOR-EACH{li => SET-ATTR{data-test ↤ 'some text'}}", + ); assert_eq!( parsed, - Ok(Command::ForEach( + Ok(ElementProcessingCommand::ForEach( CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element( "li" ))]), - Pipeline::new(vec![Command::SetAttribute( - String::from("data-test"), - ValueSource::StringValue(String::from("some text")) + ElementProcessingPipeline::new(vec![ElementProcessingCommand::SetAttribute( + "data-test", + ValueSource::StringValue("some text") )]), )) ); @@ -308,81 +376,90 @@ fn parse_single_for_each_with_ascii_arrow_using_set_attr() { #[test] fn parse_single_add_element_using_new_alias() { - let parsed = super::grammar::command("ADD-ELEMENT{NEW{div}}"); + let parsed = super::grammar::element_processing_command("ADD-ELEMENT{NEW{div}}"); assert_eq!( parsed, - Ok(Command::AddElement(Pipeline::new(vec![ - Command::CreateElement(String::from("div")) - ]))) + Ok(ElementProcessingCommand::AddElement( + ElementCreatingPipeline::new(ElementCreatingCommand::CreateElement("div"), None) + )) ); } #[test] fn parse_single_add_element_using_create() { - let parsed = super::grammar::command("ADD-ELEMENT{CREATE-ELEMENT{div}}"); + let parsed = super::grammar::element_processing_command("ADD-ELEMENT{CREATE-ELEMENT{div}}"); assert_eq!( parsed, - Ok(Command::AddElement(Pipeline::new(vec![ - Command::CreateElement(String::from("div")) - ]))) + Ok(ElementProcessingCommand::AddElement( + ElementCreatingPipeline::new(ElementCreatingCommand::CreateElement("div"), None) + )) ); } #[test] -fn parse_single_add_element_using_read_from() { - let parsed = super::grammar::command("ADD-ELEMENT{READ-FROM{'tests/source.html'}}"); +fn parse_single_add_element_using_load_file() { + let parsed = + super::grammar::element_processing_command("ADD-ELEMENT{LOAD-FILE{'tests/source.html'}}"); assert_eq!( parsed, - Ok(Command::AddElement(Pipeline::new(vec![Command::ReadFrom( - String::from("tests/source.html") - )]))) + Ok(ElementProcessingCommand::AddElement( + ElementCreatingPipeline::new( + ElementCreatingCommand::FromFile("tests/source.html"), + None + ) + )) ); } #[test] fn parse_single_add_element_using_source() { - let parsed = super::grammar::command("ADD-ELEMENT{SOURCE{'tests/source.html'}}"); + let parsed = + super::grammar::element_processing_command("ADD-ELEMENT{SOURCE{'tests/source.html'}}"); assert_eq!( parsed, - Ok(Command::AddElement(Pipeline::new(vec![Command::ReadFrom( - String::from("tests/source.html") - )]))) + Ok(ElementProcessingCommand::AddElement( + ElementCreatingPipeline::new( + ElementCreatingCommand::FromFile("tests/source.html"), + None + ) + )) ); } #[test] fn parse_single_add_element_with_arrow_using_create() { - let parsed = super::grammar::command("ADD-ELEMENT{ ↤ CREATE-ELEMENT{div}}"); + let parsed = super::grammar::element_processing_command("ADD-ELEMENT{ ↤ CREATE-ELEMENT{div}}"); assert_eq!( parsed, - Ok(Command::AddElement(Pipeline::new(vec![ - Command::CreateElement(String::from("div")) - ]))) + Ok(ElementProcessingCommand::AddElement( + ElementCreatingPipeline::new(ElementCreatingCommand::CreateElement("div"), None) + )) ); } #[test] fn parse_single_add_element_with_ascii_arrow_using_create() { - let parsed = super::grammar::command("ADD-ELEMENT{ <= CREATE-ELEMENT{div}}"); + let parsed = super::grammar::element_processing_command("ADD-ELEMENT{ <= CREATE-ELEMENT{div}}"); assert_eq!( parsed, - Ok(Command::AddElement(Pipeline::new(vec![ - Command::CreateElement(String::from("div")) - ]))) + Ok(ElementProcessingCommand::AddElement( + ElementCreatingPipeline::new(ElementCreatingCommand::CreateElement("div"), None) + )) ); } //noinspection DuplicatedCode #[test] fn parse_single_replace_using_create() { - let parsed = super::grammar::command("REPLACE{.replace-me ↤ CREATE-ELEMENT{p} }"); + let parsed = + super::grammar::element_processing_command("REPLACE{.replace-me ↤ CREATE-ELEMENT{p} }"); assert_eq!( parsed, - Ok(Command::Replace( + Ok(ElementProcessingCommand::Replace( CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( "replace-me" ))]), - Pipeline::new(vec![Command::CreateElement(String::from("p"))]) + ElementCreatingPipeline::new(ElementCreatingCommand::CreateElement("p"), None) )), ); } @@ -390,14 +467,140 @@ fn parse_single_replace_using_create() { //noinspection DuplicatedCode #[test] fn parse_single_replace_with_ascii_arrow_using_create() { - let parsed = super::grammar::command("REPLACE{.replace-me <= CREATE-ELEMENT{p} }"); + let parsed = + super::grammar::element_processing_command("REPLACE{.replace-me <= CREATE-ELEMENT{p} }"); + assert_eq!( + parsed, + Ok(ElementProcessingCommand::Replace( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( + "replace-me" + ))]), + ElementCreatingPipeline::new(ElementCreatingCommand::CreateElement("p"), None) + )), + ); +} + +//noinspection DuplicatedCode +#[test] +fn parse_single_replace_using_query_replaced() { + let parsed = + super::grammar::element_processing_command("REPLACE{.replace-me ↤ QUERY-REPLACED{p} }"); assert_eq!( parsed, - Ok(Command::Replace( + Ok(ElementProcessingCommand::Replace( CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( "replace-me" ))]), - Pipeline::new(vec![Command::CreateElement(String::from("p"))]) + ElementCreatingPipeline::new( + ElementCreatingCommand::FromReplaced(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_element("p")) + ])), + None + ) + )), + ); +} + +//noinspection DuplicatedCode +#[test] +fn parse_single_replace_using_query_replaced_alias_keep() { + let parsed = super::grammar::element_processing_command("REPLACE{.replace-me ↤ KEEP{p} }"); + assert_eq!( + parsed, + Ok(ElementProcessingCommand::Replace( + CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( + "replace-me" + ))]), + ElementCreatingPipeline::new( + ElementCreatingCommand::FromReplaced(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_element("p")) + ])), + None + ) + )), + ); +} + +#[test] +fn parse_string_creating_pipeline_use_element_get_attr() { + let parsed = super::grammar::string_creating_pipeline("USE-ELEMENT | GET-ATTR{data-test}"); + assert_eq!( + parsed, + Ok(StringValueCreatingPipeline::new( + ElementSelectingCommand::UseElement, + ValueExtractingCommand::GetAttribute("data-test"), )), ); } + +#[test] +fn parse_use_element() { + let parsed = super::grammar::element_selecting_command("USE-ELEMENT"); + assert_eq!(parsed, Ok(ElementSelectingCommand::UseElement)); +} + +#[test] +fn parse_use_element_alias_this() { + let parsed = super::grammar::element_selecting_command("THIS"); + assert_eq!(parsed, Ok(ElementSelectingCommand::UseElement)); +} + +#[test] +fn parse_use_parent() { + let parsed = super::grammar::element_selecting_command("USE-PARENT"); + assert_eq!(parsed, Ok(ElementSelectingCommand::UseParent)); +} + +#[test] +fn parse_use_parent_alias_parent() { + let parsed = super::grammar::element_selecting_command("PARENT"); + assert_eq!(parsed, Ok(ElementSelectingCommand::UseParent)); +} + +#[test] +fn parse_query_element() { + let parsed = super::grammar::element_selecting_command("QUERY-ELEMENT{div}"); + assert_eq!( + parsed, + Ok(ElementSelectingCommand::QueryElement(CssSelectorList::new( + vec![CssSelectorPath::single(CssSelector::for_element("div"))] + ))), + ); +} + +#[test] +fn parse_query_parent() { + let parsed = super::grammar::element_selecting_command("QUERY-PARENT{div}"); + assert_eq!( + parsed, + Ok(ElementSelectingCommand::QueryParent(CssSelectorList::new( + vec![CssSelectorPath::single(CssSelector::for_element("div"))] + ))), + ); +} + +#[test] +fn parse_query_root() { + let parsed = super::grammar::element_selecting_command("QUERY-ROOT{div}"); + assert_eq!( + parsed, + Ok(ElementSelectingCommand::QueryRoot(CssSelectorList::new( + vec![CssSelectorPath::single(CssSelector::for_element("div"))] + ))), + ); +} + +#[test] +fn parse_get_attr() { + let parsed = super::grammar::value_extracting_command("GET-ATTR{data-test}"); + assert_eq!( + parsed, + Ok(ValueExtractingCommand::GetAttribute("data-test")), + ); +} + +#[test] +fn parse_get_text_content() { + let parsed = super::grammar::value_extracting_command("GET-TEXT-CONTENT"); + assert_eq!(parsed, Ok(ValueExtractingCommand::GetTextContent)); +} diff --git a/src/pipeline/tests.rs b/src/pipeline/tests.rs deleted file mode 100644 index 90e19cb..0000000 --- a/src/pipeline/tests.rs +++ /dev/null @@ -1,533 +0,0 @@ -use crate::html::HtmlRenderable; -use crate::{ - Command, CssSelector, CssSelectorList, CssSelectorPath, CssSelectorStep, HtmlContent, Pipeline, - ValueSource, -}; - -const TEST_HTML_DOCUMENT: &str = r#" - - -

Title

-

Some first text

-

Some more text, even with an

-

Third text of HTML, but no CSS

-
    -
  • 1
  • -
  • 2
  • -
  • 3
  • -
- -"#; - -#[test] -fn run_on_single_only() { - let pipeline = Pipeline::new(vec![Command::Only(CssSelectorList::new(vec![ - CssSelectorPath::new( - CssSelector::for_element("h1"), - vec![CssSelectorStep::adjacent_sibling(CssSelector::for_element( - "p", - ))], - ), - ]))]); - - let dom = tl::parse(TEST_HTML_DOCUMENT, tl::ParserOptions::default()).unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"

Some first text

"#) - ); -} - -#[test] -fn run_on_single_without() { - let pipeline = Pipeline::new(vec![Command::Without(CssSelectorList::new(vec![ - CssSelectorPath::new( - CssSelector::for_element("h1"), - vec![CssSelectorStep::adjacent_sibling(CssSelector::for_element( - "p", - ))], - ), - ]))]); - - let dom = tl::parse(TEST_HTML_DOCUMENT, tl::ParserOptions::default()).unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - let mut result = pipeline.run_on(vec![starting_elements]).unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from( - r#" - - -

Title

- -

Some more text, even with an

-

Third text of HTML, but no CSS

-
    -
  • 1
  • -
  • 2
  • -
  • 3
  • -
- -"# - ) - ); -} - -#[test] -fn run_on_single_clear_attr() { - let pipeline = Pipeline::new(vec![Command::ClearAttribute(String::from("data-test"))]); - - let dom = tl::parse( - r#"
Some Content
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
Some Content
"#) - ); -} - -#[test] -fn run_on_single_clear_content() { - let pipeline = Pipeline::new(vec![Command::ClearContent]); - - let dom = tl::parse( - r#"
Some Content
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
"#) - ); -} - -#[test] -fn run_on_single_set_attr_from_string_over_existing_attr() { - let pipeline = Pipeline::new(vec![Command::SetAttribute( - String::from("data-test"), - ValueSource::StringValue(String::from("some text")), - )]); - - let dom = tl::parse( - r#"
Some Content
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
Some Content
"#) - ); -} - -#[test] -fn run_on_single_set_attr_from_string_as_new_attr() { - let pipeline = Pipeline::new(vec![Command::SetAttribute( - String::from("data-fubar"), - ValueSource::StringValue(String::from("some text")), - )]); - - let dom = tl::parse( - r#"
Some Content
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from( - r#"
Some Content
"# - ) - ); -} - -#[test] -fn run_on_single_set_text_content_from_string_for_tag() { - let pipeline = Pipeline::new(vec![Command::SetTextContent(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
Some Content
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
Other Content
"#) - ); -} - -#[test] -fn run_on_single_set_text_content_from_string_for_empty_tag() { - let pipeline = Pipeline::new(vec![Command::SetTextContent(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
Other Content
"#) - ); -} - -#[test] -fn run_on_single_set_text_content_from_string_for_tag_with_multiple_children() { - let pipeline = Pipeline::new(vec![Command::SetTextContent(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
Some special Content.
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
Other Content
"#) - ); -} - -#[test] -fn run_on_single_add_text_content_from_string_for_tag() { - let pipeline = Pipeline::new(vec![Command::AddTextContent(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
Some Content
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
Some ContentOther Content
"#) - ); -} - -#[test] -fn run_on_single_add_text_content_from_string_for_empty_tag() { - let pipeline = Pipeline::new(vec![Command::AddTextContent(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
Other Content
"#) - ); -} - -#[test] -fn run_on_single_add_text_content_from_string_for_tag_with_multiple_children() { - let pipeline = Pipeline::new(vec![Command::AddTextContent(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
Some special Content.
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from( - r#"
Some special Content. Other Content
"# - ) - ); -} - -#[test] -fn run_on_single_add_comment_from_string_for_tag() { - let pipeline = Pipeline::new(vec![Command::AddComment(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
Some Content
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from( - r#"
Some Content
"# - ) - ); -} - -#[test] -fn run_on_single_add_comment_from_string_for_empty_tag() { - let pipeline = Pipeline::new(vec![Command::AddComment(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
"#) - ); -} - -#[test] -fn run_on_single_add_comment_from_string_for_tag_with_multiple_children() { - let pipeline = Pipeline::new(vec![Command::AddComment(ValueSource::StringValue( - String::from("Other Content"), - ))]); - - let dom = tl::parse( - r#"
Some special Content.
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from( - r#"
Some special Content.
"# - ) - ); -} - -#[test] -fn run_on_single_for_each_on_ul() { - let pipeline = Pipeline::new(vec![Command::ForEach( - CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_element( - "li", - ))]), - Pipeline::new(vec![Command::SetAttribute( - String::from("data-test"), - ValueSource::StringValue(String::from("x")), - )]), - )]); - - let dom = tl::parse( - r#"
  • 1
  • 2
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
  • 1
  • 2
"#) - ); -} - -#[test] -fn run_on_single_add_element_from_create_for_tag() { - let pipeline = Pipeline::new(vec![Command::AddElement(Pipeline::new(vec![ - Command::CreateElement(String::from("div")), - ]))]); - - let dom = tl::parse( - r#"
Some Content
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"
Some Content
"#) - ); -} - -//noinspection DuplicatedCode -#[test] -fn run_on_single_replace_from_create() { - let pipeline = Pipeline::new(vec![Command::Replace( - CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( - "replace-me", - ))]), - Pipeline::new(vec![Command::CreateElement(String::from("p"))]), - )]); - - let dom = tl::parse( - r#"
Some Content
This will be kept
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from(r#"

This will be kept
"#) - ); -} - -//noinspection DuplicatedCode -#[test] -fn run_on_single_replace_from_read_from() { - let pipeline = Pipeline::new(vec![Command::Replace( - CssSelectorList::new(vec![CssSelectorPath::single(CssSelector::for_class( - "replace-me", - ))]), - Pipeline::new(vec![Command::ReadFrom(String::from( - "tests/single_div.html", - ))]), - )]); - - let dom = tl::parse( - r#"
Some Content
This will be kept
"#, - tl::ParserOptions::default(), - ) - .unwrap(); - let starting_elements = HtmlContent::import(dom).unwrap(); - - let mut result = pipeline - .run_on(vec![rctree::Node::clone(&starting_elements)]) - .unwrap(); - - assert_eq!(result.len(), 1); - let first_result = result.pop().unwrap(); - assert_eq!( - first_result.outer_html(), - String::from( - r#"
This is new
This will be kept
"# - ) - ); -} diff --git a/src/string_creating/command.rs b/src/string_creating/command.rs new file mode 100644 index 0000000..c9073ee --- /dev/null +++ b/src/string_creating/command.rs @@ -0,0 +1,451 @@ +use crate::{CommandError, CssSelectorList, HtmlContent, HtmlRenderable}; +use rctree::Node; + +#[derive(Debug, PartialEq, Clone)] +pub enum ElementSelectingCommand<'a> { + /// Returns the previously selected element + UseElement, + /// Returns the parent of the previously selected element (if exists) + UseParent, + /// Run a CSS selector on the previously selected element + QueryElement(CssSelectorList<'a>), + /// Run a CSS selector on the parent of the previously selected element (if exists) + QueryParent(CssSelectorList<'a>), + /// Run a CSS selector on the root of the tree the previously selected element belongs to + /// If the previously selected element is the root, the selector is run against that + QueryRoot(CssSelectorList<'a>), +} + +impl<'a> ElementSelectingCommand<'a> { + /// perform the action defined by the command on the set of nodes + /// and return the calculated results. + /// For some command the output can be equal to the input, + /// others change the result-set + pub(crate) fn execute( + &self, + input: &rctree::Node, + ) -> Result>, CommandError> { + match self { + ElementSelectingCommand::UseElement => Self::use_element(input), + ElementSelectingCommand::UseParent => Self::use_parent(input), + ElementSelectingCommand::QueryElement(selector) => Self::query_element(input, selector), + ElementSelectingCommand::QueryParent(selector) => Self::query_parent(input, selector), + ElementSelectingCommand::QueryRoot(selector) => Self::query_root(input, selector), + } + } + + fn use_element(input: &Node) -> Result>, CommandError> { + Ok(vec![rctree::Node::clone(input)]) + } + + fn use_parent(input: &Node) -> Result>, CommandError> { + if let Some(parent) = input.parent() { + return Ok(vec![parent]); + } + + Ok(vec![]) + } + + fn query_element( + input: &Node, + selector: &CssSelectorList<'a>, + ) -> Result>, CommandError> { + Ok(selector.query(&vec![rctree::Node::clone(input)])) + } + + fn query_parent( + input: &Node, + selector: &CssSelectorList<'a>, + ) -> Result>, CommandError> { + if let Some(parent) = input.parent() { + return Ok(selector.query(&vec![parent])); + } + + Ok(vec![]) + } + + fn query_root( + input: &Node, + selector: &CssSelectorList<'a>, + ) -> Result>, CommandError> { + let mut root = Node::clone(input); + + loop { + if let Some(parent) = root.parent() { + root = parent; + } else { + break; + } + } + + Ok(selector.query(&vec![root])) + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum ValueExtractingCommand<'a> { + /// Returns the previously selected element + GetAttribute(&'a str), + GetTextContent, +} + +impl<'a> ValueExtractingCommand<'a> { + /// perform the action defined by the command on the set of nodes + /// and return the calculated results. + /// For some command the output can be equal to the input, + /// others change the result-set + pub(crate) fn execute( + &self, + input: &Vec>, + ) -> Result, CommandError> { + match self { + ValueExtractingCommand::GetAttribute(attr_name) => { + Self::get_attribute(input, attr_name) + } + ValueExtractingCommand::GetTextContent => Self::get_text_content(input), + } + } + + fn get_attribute( + input: &Vec>, + attr_name: &str, + ) -> Result, CommandError> { + let attribute = String::from(attr_name); + Ok(input + .iter() + .filter_map(|n| { + let data = n.borrow(); + + data.get_attribute(&attribute) + }) + .collect::>()) + } + + fn get_text_content(input: &Vec>) -> Result, CommandError> { + Ok(input + .iter() + .filter_map(|n| { + let content = n.text_content(); + if content.is_empty() { + None + } else { + Some(content) + } + }) + .collect::>()) + } +} + +#[cfg(test)] +mod test { + use crate::string_creating::{ElementSelectingCommand, ValueExtractingCommand}; + use crate::{load_inline_html, CssSelector, CssSelectorList, CssSelectorPath}; + + #[test] + fn use_element_returns_self() { + let root = load_inline_html(r#"
"#); + let command = ElementSelectingCommand::UseElement; + + let mut result = command.execute(&root).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, root); + } + + #[test] + fn use_parent_returns_parent_on_existing_parent() { + let root = load_inline_html( + r#"
"#, + ); + let target_node = root.first_child().unwrap(); + let command = ElementSelectingCommand::UseParent; + + let mut result = command.execute(&target_node).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, root); + } + + #[test] + fn use_parent_returns_empty_on_root() { + let root = load_inline_html( + r#"
"#, + ); + let command = ElementSelectingCommand::UseParent; + + let result = command.execute(&root).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn querying_element_returns_matching_element() { + let root = load_inline_html( + r#"
"#, + ); + let command = ElementSelectingCommand::QueryElement(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let mut result = command.execute(&root).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, root.first_child().unwrap()); + } + + #[test] + fn querying_element_returns_multiple_matching_elements() { + let root = load_inline_html( + r#"

"#, + ); + let command = ElementSelectingCommand::QueryElement(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&root).unwrap(); + + assert_eq!(result.len(), 2); + assert!(result.contains(&root.first_child().unwrap())); + assert!(result.contains(&root.last_child().unwrap().first_child().unwrap())); + } + + #[test] + fn query_element_returns_empty_on_querying_nonexistent_el() { + let root = load_inline_html( + r#"
"#, + ); + let command = ElementSelectingCommand::QueryElement(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&root).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn querying_parent_returns_matching_element() { + let root = load_inline_html( + r#"
"#, + ); + let target_node = root.first_child().unwrap(); + let command = ElementSelectingCommand::QueryParent(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let mut result = command.execute(&target_node).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, target_node.next_sibling().unwrap()); + } + + #[test] + fn querying_parent_returns_multiple_matching_elements() { + let root = load_inline_html( + r#"
"#, + ); + let target_node = root.first_child().unwrap(); + let command = ElementSelectingCommand::QueryParent(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&target_node).unwrap(); + + assert_eq!(result.len(), 2); + assert!(result.contains(&target_node.next_sibling().unwrap())); + assert!(result.contains(&target_node.next_sibling().unwrap().first_child().unwrap())); + } + + #[test] + fn query_parent_returns_empty_on_root() { + let root = load_inline_html( + r#"
"#, + ); + let command = ElementSelectingCommand::QueryParent(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&root).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn query_parent_returns_empty_on_querying_nonexistent_el() { + let root = load_inline_html( + r#"
"#, + ); + let target_node = root.first_child().unwrap(); + let command = ElementSelectingCommand::QueryParent(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&target_node).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn query_parent_returns_empty_on_matching_element_outside_parent() { + let root = load_inline_html( + r#"
"#, + ); + let target_node = root.first_child().unwrap().first_child().unwrap(); + let command = ElementSelectingCommand::QueryParent(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&target_node).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn query_root_returns_matching_element() { + let root = load_inline_html( + r#"
"#, + ); + let target_node = root + .first_child() + .unwrap() + .first_child() + .unwrap() + .first_child() + .unwrap(); + let command = ElementSelectingCommand::QueryRoot(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let mut result = command.execute(&target_node).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, root.last_child().unwrap()); + } + + #[test] + fn query_root_returns_multiple_matching_elements() { + let root = load_inline_html( + r#"
"#, + ); + let target_node = root + .first_child() + .unwrap() + .first_child() + .unwrap() + .first_child() + .unwrap(); + let command = ElementSelectingCommand::QueryRoot(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&target_node).unwrap(); + + assert_eq!(result.len(), 2); + assert!(result.contains(&root.last_child().unwrap())); + assert!(result.contains(&target_node.next_sibling().unwrap())); + } + + #[test] + fn query_root_on_root_queries_itself() { + let root = load_inline_html( + r#"
"#, + ); + let command = ElementSelectingCommand::QueryRoot(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let mut result = command.execute(&root).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, root.last_child().unwrap()); + } + + #[test] + fn query_root_return_empty_on_nonexistent_el() { + let root = load_inline_html( + r#"
"#, + ); + let target_node = root + .first_child() + .unwrap() + .first_child() + .unwrap() + .first_child() + .unwrap(); + let command = ElementSelectingCommand::QueryRoot(CssSelectorList::new(vec![ + CssSelectorPath::single(CssSelector::for_class("test-source")), + ])); + + let result = command.execute(&target_node).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn get_attr_returns_value_on_existing_attr() { + let root = load_inline_html(r#"
"#); + let command = ValueExtractingCommand::GetAttribute("data-test"); + + let mut result = command.execute(&vec![root]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, String::from("foo")); + } + + #[test] + fn get_attr_returns_empty_on_missing_attr() { + let root = load_inline_html(r#"
"#); + let command = ValueExtractingCommand::GetAttribute("data-test"); + + let result = command.execute(&vec![root]).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn get_attr_returns_empty_on_empty_input() { + let command = ValueExtractingCommand::GetAttribute("data-test"); + + let result = command.execute(&vec![]).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn get_text_content_returns_correct_value_on_existing_content() { + let root = load_inline_html(r#"
The content
"#); + let command = ValueExtractingCommand::GetTextContent; + + let mut result = command.execute(&vec![root]).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, String::from("The content")); + } + + #[test] + fn get_text_content_returns_empty_string_on_empty_content() { + let root = load_inline_html(r#"
"#); + let command = ValueExtractingCommand::GetTextContent; + + let result = command.execute(&vec![root]).unwrap(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn get_text_content_returns_empty_string_on_empty_input() { + let command = ValueExtractingCommand::GetTextContent; + + let result = command.execute(&vec![]).unwrap(); + + assert_eq!(result.len(), 0); + } +} diff --git a/src/string_creating/mod.rs b/src/string_creating/mod.rs new file mode 100644 index 0000000..e1e73ff --- /dev/null +++ b/src/string_creating/mod.rs @@ -0,0 +1,5 @@ +mod command; +mod pipeline; + +pub(crate) use command::{ElementSelectingCommand, ValueExtractingCommand}; +pub(crate) use pipeline::StringValueCreatingPipeline; diff --git a/src/string_creating/pipeline.rs b/src/string_creating/pipeline.rs new file mode 100644 index 0000000..fcea79b --- /dev/null +++ b/src/string_creating/pipeline.rs @@ -0,0 +1,87 @@ +use crate::string_creating::{ElementSelectingCommand, ValueExtractingCommand}; +use crate::{CommandFailedSnafu, HtmlContent, PipelineError}; +use snafu::ResultExt; + +#[derive(Debug, PartialEq, Clone)] +pub struct StringValueCreatingPipeline<'a> { + element_selector: ElementSelectingCommand<'a>, + value_extractor: ValueExtractingCommand<'a>, + //todo: value_processing: Vec> +} + +/// The command pipeline: a list of individual commands +/// each to execute on the result of the previous command +impl<'a> StringValueCreatingPipeline<'a> { + pub const fn new( + element_selector: ElementSelectingCommand<'a>, + value_extractor: ValueExtractingCommand<'a>, + ) -> Self { + StringValueCreatingPipeline { + element_selector, + value_extractor, + } + } + + /// execute the pipeline on the given nodes by + /// running the first commands on those nodes and all the following commands + /// on their predecessors result. + /// The result of the last command is the result of this pipeline + pub(crate) fn run_on( + &self, + node: &rctree::Node, + ) -> Result, PipelineError> { + let element = self + .element_selector + .execute(node) + .context(CommandFailedSnafu { index: 0_usize })?; + self.value_extractor + .execute(&element) + .context(CommandFailedSnafu { index: 1_usize }) + } +} + +#[cfg(test)] +mod test { + use crate::string_creating::{ElementSelectingCommand, ValueExtractingCommand}; + use crate::{HtmlContent, StringValueCreatingPipeline}; + + #[test] + fn get_attr_from_element_returns_correct_value() { + let pipeline = StringValueCreatingPipeline::new( + ElementSelectingCommand::UseElement, + ValueExtractingCommand::GetAttribute("data-test"), + ); + + let dom = tl::parse( + r#"
"#, + tl::ParserOptions::default(), + ) + .unwrap(); + let starting_element = HtmlContent::import(dom).unwrap(); + + let mut result = pipeline.run_on(&starting_element).unwrap(); + + assert_eq!(result.len(), 1); + let first_result = result.pop().unwrap(); + assert_eq!(first_result, String::from("foo")); + } + + #[test] + fn get_attr_returns_empty_for_empty_selection() { + let pipeline = StringValueCreatingPipeline::new( + ElementSelectingCommand::UseParent, + ValueExtractingCommand::GetAttribute("data-other"), + ); + + let dom = tl::parse( + r#"
"#, + tl::ParserOptions::default(), + ) + .unwrap(); + let starting_element = HtmlContent::import(dom).unwrap(); + + let result = pipeline.run_on(&starting_element).unwrap(); + + assert_eq!(result.len(), 0); + } +} diff --git a/tests/add_comment.rs b/tests/add_comment.rs index 75c2e48..f16982b 100644 --- a/tests/add_comment.rs +++ b/tests/add_comment.rs @@ -17,7 +17,7 @@ const HTML_INPUT: &str = r#" #[test] fn add_to_first_p_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | ADD-COMMENT{'followed by a comment'}"; + let command = "EXTRACT-ELEMENT{#first-para} | ADD-COMMENT{'followed by a comment'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -36,7 +36,7 @@ fn add_to_first_p_content() -> Result<(), StreamingEditorError> { #[test] fn add_to_ul() -> Result<(), StreamingEditorError> { - let command = "ONLY{ul} | ADD-COMMENT{'Foo'}"; + let command = "EXTRACT-ELEMENT{ul} | ADD-COMMENT{'Foo'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -61,7 +61,8 @@ fn add_to_ul() -> Result<(), StreamingEditorError> { #[test] fn add_double_dash_will_be_escaped() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | ADD-COMMENT{'Actually -- is illegal in comments'}"; + let command = + "EXTRACT-ELEMENT{#first-para} | ADD-COMMENT{'Actually -- is illegal in comments'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -79,3 +80,37 @@ fn add_double_dash_will_be_escaped() -> Result<(), StreamingEditorError> { Ok(()) } + +#[test] +fn add_ul_id_as_comment_to_first_para() -> Result<(), StreamingEditorError> { + let command = "FOR-EACH{#first-para ↦ ADD-COMMENT{ QUERY-PARENT{ul} | GET-ATTR{id} } }"; + + let mut input = Box::new(HTML_INPUT.as_bytes()); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from( + r#" + + +

Title

+

Some first text

+

Some more text, even with an

+

Third text of HTML, but no CSS

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"# + ) + ); + + Ok(()) +} diff --git a/tests/add_element.rs b/tests/add_element.rs index 1d534c2..738a2b1 100644 --- a/tests/add_element.rs +++ b/tests/add_element.rs @@ -17,7 +17,7 @@ const HTML_INPUT: &str = r#" #[test] fn add_simple_div_to_first_p_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | ADD-ELEMENT{ CREATE-ELEMENT{div} }"; + let command = "EXTRACT-ELEMENT{#first-para} | ADD-ELEMENT{ CREATE-ELEMENT{div} }"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -36,7 +36,7 @@ fn add_simple_div_to_first_p_content() -> Result<(), StreamingEditorError> { #[test] fn add_two_divs_to_first_p_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | ADD-ELEMENT{ CREATE-ELEMENT{div} } | ADD-ELEMENT{ CREATE-ELEMENT{div} }"; + let command = "EXTRACT-ELEMENT{#first-para} | ADD-ELEMENT{ CREATE-ELEMENT{div} } | ADD-ELEMENT{ CREATE-ELEMENT{div} }"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -55,7 +55,8 @@ fn add_two_divs_to_first_p_content() -> Result<(), StreamingEditorError> { #[test] fn add_div_with_attr_to_first_p_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | ADD-ELEMENT{ CREATE-ELEMENT{div} | SET-ATTR{id ↤ 'new'} }"; + let command = + "EXTRACT-ELEMENT{#first-para} | ADD-ELEMENT{ CREATE-ELEMENT{div} | SET-ATTR{id ↤ 'new'} }"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -71,3 +72,41 @@ fn add_div_with_attr_to_first_p_content() -> Result<(), StreamingEditorError> { Ok(()) } + +#[test] +fn copy_title_to_meta_tag() -> Result<(), StreamingEditorError> { + let command = "FOR-EACH{head ↦ ADD-ELEMENT{ ↤ CREATE-ELEMENT{meta} | SET-ATTR{name ↤ 'title' } } | FOR-EACH{meta[name='title'] ↦ SET-ATTR{content ↤ QUERY-PARENT{title} | GET-TEXT-CONTENT } } }"; + + let mut input = Box::new( + r#" + + This is the title + + +

Title

+ +"# + .as_bytes(), + ); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from( + r#" + + This is the title + + +

Title

+ +"# + ) + ); + + Ok(()) +} diff --git a/tests/add_text_content.rs b/tests/add_text_content.rs index 09914c9..9c79a8d 100644 --- a/tests/add_text_content.rs +++ b/tests/add_text_content.rs @@ -17,7 +17,7 @@ const HTML_INPUT: &str = r#" #[test] fn add_to_first_p_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | ADD-TEXT-CONTENT{'... expanded by other text'}"; + let command = "EXTRACT-ELEMENT{#first-para} | ADD-TEXT-CONTENT{'... expanded by other text'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -36,7 +36,8 @@ fn add_to_first_p_content() -> Result<(), StreamingEditorError> { #[test] fn add_escape_needing_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | ADD-TEXT-CONTENT{' is > others < & you never know which'}"; + let command = + "EXTRACT-ELEMENT{#first-para} | ADD-TEXT-CONTENT{' is > others < & you never know which'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -54,3 +55,37 @@ fn add_escape_needing_content() -> Result<(), StreamingEditorError> { Ok(()) } + +#[test] +fn add_ul_id_as_text_to_first_para() -> Result<(), StreamingEditorError> { + let command = "FOR-EACH{#first-para ↦ ADD-TEXT-CONTENT{' and ul-id is: '} | ADD-TEXT-CONTENT{ QUERY-PARENT{ul} | GET-ATTR{id} } }"; + + let mut input = Box::new(HTML_INPUT.as_bytes()); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from( + r#" + + +

Title

+

Some first text and ul-id is: list

+

Some more text, even with an

+

Third text of HTML, but no CSS

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"# + ) + ); + + Ok(()) +} diff --git a/tests/clear_attr.rs b/tests/clear_attr.rs index b0b5c5b..a549131 100644 --- a/tests/clear_attr.rs +++ b/tests/clear_attr.rs @@ -17,7 +17,7 @@ const HTML_INPUT: &str = r#" #[test] fn clear_first_p_id() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | CLEAR-ATTR{id}"; + let command = "EXTRACT-ELEMENT{#first-para} | CLEAR-ATTR{id}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); diff --git a/tests/clear_content.rs b/tests/clear_content.rs index 47424a2..769dacb 100644 --- a/tests/clear_content.rs +++ b/tests/clear_content.rs @@ -17,7 +17,7 @@ const HTML_INPUT: &str = r#" #[test] fn clear_first_p_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | CLEAR-CONTENT"; + let command = "EXTRACT-ELEMENT{#first-para} | CLEAR-CONTENT"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); diff --git a/tests/only.rs b/tests/extract_element.rs similarity index 94% rename from tests/only.rs rename to tests/extract_element.rs index 4861e74..c18e73e 100644 --- a/tests/only.rs +++ b/tests/extract_element.rs @@ -17,7 +17,7 @@ const HTML_INPUT: &str = r#" #[test] fn only_first_para() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para}"; + let command = "EXTRACT-ELEMENT{#first-para}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -36,7 +36,7 @@ fn only_first_para() -> Result<(), StreamingEditorError> { #[test] fn only_list_items() -> Result<(), StreamingEditorError> { - let command = "ONLY{li}"; + let command = "EXTRACT-ELEMENT{li}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); diff --git a/tests/for_each.rs b/tests/for.rs similarity index 92% rename from tests/for_each.rs rename to tests/for.rs index 979718c..04e959c 100644 --- a/tests/for_each.rs +++ b/tests/for.rs @@ -17,7 +17,7 @@ const HTML_INPUT: &str = r#" #[test] fn add_attr_to_li() -> Result<(), StreamingEditorError> { - let command = r#"ONLY{ul} | FOR-EACH{li ↦ SET-ATTR{data-test ↤ "x"}}"#; + let command = r#"EXTRACT-ELEMENT{ul} | FOR-EACH{li ↦ SET-ATTR{data-test ↤ "x"}}"#; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); diff --git a/tests/without.rs b/tests/remove_element.rs similarity index 95% rename from tests/without.rs rename to tests/remove_element.rs index 78be008..13f5124 100644 --- a/tests/without.rs +++ b/tests/remove_element.rs @@ -4,7 +4,7 @@ const HTML_INPUT: &str = r#"

Title

Result<(), StreamingEditorError> { #[test] fn replace_ul_with_sourced_html() -> Result<(), StreamingEditorError> { - let command = "REPLACE{ul ↤ READ-FROM{'tests/source.html'} | ONLY{ul}}"; + let command = "REPLACE{ul ↤ LOAD-FILE{'tests/source.html'} | EXTRACT-ELEMENT{ul}}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -82,3 +82,37 @@ fn replace_ul_with_sourced_html() -> Result<(), StreamingEditorError> { Ok(()) } + +#[test] +fn replace_third_para_with_child_abbr() -> Result<(), StreamingEditorError> { + let command = "REPLACE{#third-para ↤ QUERY-REPLACED{abbr}}"; + + let mut input = Box::new(HTML_INPUT.as_bytes()); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from( + r#" + + +

Title

+

Some first text

+

Some more text, even with an

+ HTMLCSS +
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"# + ) + ); + + Ok(()) +} diff --git a/tests/set_attr.rs b/tests/set_attr.rs index 4bc687c..2e025ba 100644 --- a/tests/set_attr.rs +++ b/tests/set_attr.rs @@ -1,7 +1,9 @@ use html_streaming_editor::*; const HTML_INPUT: &str = r#" - + + +

Title

Some first text

@@ -17,7 +19,7 @@ const HTML_INPUT: &str = r#" #[test] fn overwrite_first_p_id() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | SET-ATTR{id ↤ 'new-id'}"; + let command = "EXTRACT-ELEMENT{#first-para} | SET-ATTR{id ↤ 'new-id'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -36,7 +38,7 @@ fn overwrite_first_p_id() -> Result<(), StreamingEditorError> { #[test] fn add_attr_to_first_p() -> Result<(), StreamingEditorError> { - let command = r#"ONLY{#first-para} | SET-ATTR{data-test ↤ "some value"}"#; + let command = r#"EXTRACT-ELEMENT{#first-para} | SET-ATTR{data-test ↤ "some value"}"#; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -55,7 +57,7 @@ fn add_attr_to_first_p() -> Result<(), StreamingEditorError> { #[test] fn set_attr_with_double_quotes() -> Result<(), StreamingEditorError> { - let command = r#"ONLY{#first-para} | SET-ATTR{data-test ↤ 'some "value"'}"#; + let command = r#"EXTRACT-ELEMENT{#first-para} | SET-ATTR{data-test ↤ 'some "value"'}"#; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -76,7 +78,7 @@ fn set_attr_with_double_quotes() -> Result<(), StreamingEditorError> { #[test] fn set_attr_with_line_break() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | SET-ATTR{data-test ↤ 'some \nvalue'}"; + let command = "EXTRACT-ELEMENT{#first-para} | SET-ATTR{data-test ↤ 'some \nvalue'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -92,3 +94,78 @@ fn set_attr_with_line_break() -> Result<(), StreamingEditorError> { Ok(()) } + +#[test] +fn set_attr_from_other_attr() -> Result<(), StreamingEditorError> { + let command = "EXTRACT-ELEMENT{#first-para} | SET-ATTR{data-test ↤ USE-ELEMENT | GET-ATTR{id}}"; + + let mut input = Box::new(HTML_INPUT.as_bytes()); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from(r#"

Some first text

"#) + ); + + Ok(()) +} + +#[test] +fn set_attr_from_attr_of_sibling() -> Result<(), StreamingEditorError> { + let command = + "FOR-EACH{#first-para ↦ SET-ATTR{data-test ↤ QUERY-PARENT{#second-para} | GET-ATTR{id}}} | EXTRACT-ELEMENT{#first-para}"; + + let mut input = Box::new(HTML_INPUT.as_bytes()); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from(r#"

Some first text

"#) + ); + + Ok(()) +} + +#[test] +fn set_attr_from_attr_of_head_meta() -> Result<(), StreamingEditorError> { + let command = "FOR-EACH{#first-para ↦ SET-ATTR{data-test ↤ QUERY-ROOT{meta[name='test']} | GET-ATTR{content}}}"; + + let mut input = Box::new(HTML_INPUT.as_bytes()); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from( + r#" + + + + +

Title

+

Some first text

+

Some more text, even with an

+

Third text of HTML, but no CSS

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"# + ) + ); + + Ok(()) +} diff --git a/tests/set_text_content.rs b/tests/set_text_content.rs index 91440fa..c089921 100644 --- a/tests/set_text_content.rs +++ b/tests/set_text_content.rs @@ -17,7 +17,7 @@ const HTML_INPUT: &str = r#" #[test] fn overwrite_first_p_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#first-para} | SET-TEXT-CONTENT{'Some new, boring text'}"; + let command = "EXTRACT-ELEMENT{#first-para} | SET-TEXT-CONTENT{'Some new, boring text'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -36,7 +36,7 @@ fn overwrite_first_p_content() -> Result<(), StreamingEditorError> { #[test] fn overwrite_third_p_content() -> Result<(), StreamingEditorError> { - let command = "ONLY{#third-para} | SET-TEXT-CONTENT{'Simple Text'}"; + let command = "EXTRACT-ELEMENT{#third-para} | SET-TEXT-CONTENT{'Simple Text'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -56,7 +56,7 @@ fn overwrite_third_p_content() -> Result<(), StreamingEditorError> { #[test] fn set_escape_needing_content() -> Result<(), StreamingEditorError> { let command = - "ONLY{#first-para} | SET-TEXT-CONTENT{'Some is > others < & you never know which'}"; + "EXTRACT-ELEMENT{#first-para} | SET-TEXT-CONTENT{'Some is > others < & you never know which'}"; let mut input = Box::new(HTML_INPUT.as_bytes()); let mut output = Vec::new(); @@ -74,3 +74,71 @@ fn set_escape_needing_content() -> Result<(), StreamingEditorError> { Ok(()) } + +#[test] +fn set_ul_id_as_text_to_first_para() -> Result<(), StreamingEditorError> { + let command = "FOR-EACH{#first-para ↦ SET-TEXT-CONTENT{ QUERY-PARENT{ul} | GET-ATTR{id} } }"; + + let mut input = Box::new(HTML_INPUT.as_bytes()); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from( + r#" + + +

Title

+

list

+

Some more text, even with an

+

Third text of HTML, but no CSS

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"# + ) + ); + + Ok(()) +} + +#[test] +fn set_second_para_content_as_text_to_first_para() -> Result<(), StreamingEditorError> { + let command = "FOR-EACH{#first-para ↦ SET-TEXT-CONTENT{ QUERY-PARENT{#second-para} | GET-TEXT-CONTENT } }"; + + let mut input = Box::new(HTML_INPUT.as_bytes()); + let mut output = Vec::new(); + let hse = HtmlStreamingEditor::new(&mut input, &mut output); + + let _ = hse.run(command)?; + let result_string = String::from_utf8(output).unwrap(); + + assert_eq!( + result_string, + String::from( + r#" + + +

Title

+

Some more text, even with an

+

Some more text, even with an

+

Third text of HTML, but no CSS

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ +"# + ) + ); + + Ok(()) +} diff --git a/tests/source.html b/tests/source.html index b088d0b..9c4fbf9 100644 --- a/tests/source.html +++ b/tests/source.html @@ -2,7 +2,7 @@ - READ-FROM Source + LOAD-FILE Source
Some other stuff