Skip to content

Commit

Permalink
Add 'clearing' commands
Browse files Browse the repository at this point in the history
of a given element ...
- ... clear a given attribute
- ... clear the content (a.k.a. all children)
  • Loading branch information
kelko committed Sep 24, 2022
1 parent 24d2878 commit bc88552
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 12 deletions.
19 changes: 14 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,25 @@ The `SELECTOR` is a [CSS selector](https://developer.mozilla.org/en-US/docs/Web/
Commands
-------------

Planned commands:
Currently supported:

- `ONLY`: remove everything not matching the CSS selector (alias: `SELECT`)
- `WITHOUT`: remove everything matching the CSS selector (alias: `FILTER`)
- Some attribute & text-content manipulation
- `CLEAR-ATTR`: removes a given attribute from the previously selected elements
- `CLEAR-CONTENT`: clears all children from the previously selected elements

Currently supported:
Planned commands:

- `SET-ATTR`: Sets a given attribute to a specified value
- `SET-TEXT-CONTENT`: removes previous children and replaces it with exactly one given text child
- `ADD-TEXT-CONTENT`: appends a new text child
- `ADD-COMMENT`: appends a new comment child
- `ADD-ELEMENT`: appends a new tag/element child
- `REPLACE-WITH`: replace all elements matching a CSS selector with new elements (alias: `MAP`)
- `READ-FROM`: reads a DOM from a different file, mainly in combination with `ADD-ELEMENT` or `REPLACE-WITH` (alias: `SOURCE`)
- `CREATE-ELEMENT`: creates a new, empty element, mainly in combination with `ADD-ELEMENT` or `REPLACE-WITH` (alias: `NEW`)
- `FOR-EACH`: run a sub-pipeline on all sub-elements matching a CSS selector but return the previously selected elements

- `ONLY`
- `WITHOUT`

Binary
-------
Expand Down
51 changes: 44 additions & 7 deletions src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ pub enum Command<'a> {
/// and remove them from their parent nodes.
/// Returns the input as result.
Without(CssSelectorList<'a>),
// Map(String, Pipeline),
// GetAttribute(String),
// SetAttribute(String, Pipeline),
// RemoveAttribute(String),
// GetText(),
// SetText(Pipeline),
// RemoveText(),
ClearAttribute(String),
ClearContent,
//SetAttribute
//SetTextContent
//AddTextContent
//AddElement
//AddComment
}

impl<'a> Command<'a> {
Expand All @@ -55,6 +55,8 @@ impl<'a> Command<'a> {
Command::Without(selector) => {
Self::without(input, selector).context(WithoutFailedSnafu)
}
Command::ClearAttribute(attribute) => Self::clear_attr(input, attribute),
Command::ClearContent => Self::clear_content(input),
}
}

Expand Down Expand Up @@ -82,4 +84,39 @@ impl<'a> Command<'a> {
.map(|n| rctree::Node::clone(n))
.collect::<Vec<_>>())
}

fn clear_attr(
input: &Vec<rctree::Node<HtmlContent>>,
attribute: &String,
) -> Result<Vec<rctree::Node<HtmlContent>>, CommandError> {
trace!("Running CLEAR-ATTR command for attr: {:#?}", attribute);

for node in input {
let mut working_copy = rctree::Node::clone(node);
let mut data = working_copy.borrow_mut();
data.clear_attribute(attribute);
}

Ok(input
.iter()
.map(|n| rctree::Node::clone(n))
.collect::<Vec<_>>())
}

fn clear_content(
input: &Vec<rctree::Node<HtmlContent>>,
) -> Result<Vec<rctree::Node<HtmlContent>>, CommandError> {
trace!("Running CLEAR-CONTENT command");

for node in input {
for mut child in node.children() {
child.detach()
}
}

Ok(input
.iter()
.map(|n| rctree::Node::clone(n))
.collect::<Vec<_>>())
}
}
9 changes: 9 additions & 0 deletions src/html/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,15 @@ impl HtmlContent {
HtmlContent::Tag(t) => t.matches_selector(selector),
}
}

pub(crate) fn clear_attribute(&mut self, attribute: &String) {
match self {
HtmlContent::Comment(_) | HtmlContent::Text(_) => (),
HtmlContent::Tag(tag) => {
tag.attributes.remove(attribute);
}
}
}
}

pub(crate) trait HtmlRenderable {
Expand Down
6 changes: 6 additions & 0 deletions src/parsing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,15 @@ parser! {
= ("ONLY" / "SELECT") "{" whitespace()? oc:css_selector_list() whitespace()? "}" { Command::Only(oc) }
pub(crate) rule without_command() -> Command<'input>
= ("WITHOUT" / "FILTER") "{" whitespace()? oc:css_selector_list() whitespace()? "}" { Command::Without(oc) }
pub(crate) rule clear_attr_command() -> Command<'input>
= "CLEAR-ATTR" "{" whitespace()? a:identifier() whitespace()? "}" { Command::ClearAttribute(String::from(a)) }
pub(crate) rule clear_content_command() -> Command<'input>
= "CLEAR-CONTENT" { Command::ClearContent }
rule command() -> Command<'input>
= only_command()
/ without_command()
/ clear_attr_command()
/ clear_content_command()
pub rule pipeline() -> Pipeline<'input>
= p:(command() ** " | ") { Pipeline::new(p) }
}
Expand Down
12 changes: 12 additions & 0 deletions src/parsing/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,15 @@ fn parse_two_grammar() {
]))
);
}

#[test]
fn parse_single_clear_attr() {
let parsed = super::grammar::clear_attr_command("CLEAR-ATTR{a}");
assert_eq!(parsed, Ok(Command::ClearAttribute(String::from("a"))));
}

#[test]
fn parse_single_clear_content() {
let parsed = super::grammar::clear_content_command("CLEAR-CONTENT");
assert_eq!(parsed, Ok(Command::ClearContent));
}
46 changes: 46 additions & 0 deletions src/pipeline/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,49 @@ fn run_on_single_without() {
)
);
}

#[test]
fn run_on_single_clear_attr() {
let pipeline = Pipeline::new(vec![Command::ClearAttribute(String::from("data-test"))]);

let dom = tl::parse(
r#"<div data-test="foo" class="bar">Some Content</div>"#,
tl::ParserOptions::default(),
)
.unwrap();
let starting_elements = HtmlContent::import(dom).unwrap();

let mut result = pipeline
.run_on(vec![rctree::Node::clone(&starting_elements)])
.unwrap();

assert_eq!(result.len(), 1);
let first_result = result.pop().unwrap();
assert_eq!(
first_result.outer_html(),
String::from(r#"<div class="bar">Some Content</div>"#)
);
}

#[test]
fn run_on_single_clear_content() {
let pipeline = Pipeline::new(vec![Command::ClearContent]);

let dom = tl::parse(
r#"<div data-test="foo" class="bar">Some Content</div>"#,
tl::ParserOptions::default(),
)
.unwrap();
let starting_elements = HtmlContent::import(dom).unwrap();

let mut result = pipeline
.run_on(vec![rctree::Node::clone(&starting_elements)])
.unwrap();

assert_eq!(result.len(), 1);
let first_result = result.pop().unwrap();
assert_eq!(
first_result.outer_html(),
String::from(r#"<div class="bar" data-test="foo"></div>"#)
);
}
32 changes: 32 additions & 0 deletions tests/clear_attr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use html_streaming_editor::*;

const HTML_INPUT: &str = r#"<html>
<head></head>
<body>
<h1>Title</h1>
<p id="first-para">Some first text</p>
<p id="second-para">Some more text, even with an <img src=""></p>
<p id="third-para">Third text of <abbr>HTML</abbr>, but no <abbr>CSS</abbr></p>
<ul id="list">
<li id="item-1">1</li>
<li id="item-2">2</li>
<li id="item-3">3</li>
</ul>
</body>
</html>"#;

#[test]
fn clear_first_p_id() -> Result<(), StreamingEditorError> {
let command = "ONLY{#first-para} | CLEAR-ATTR{id}";

let mut input = Box::new(HTML_INPUT.as_bytes());
let mut output = Vec::new();
let hse = HtmlStreamingEditor::new(&mut input, &mut output);

let _ = hse.run(command)?;
let result_string = String::from_utf8(output).unwrap();

assert_eq!(result_string, String::from(r#"<p>Some first text</p>"#));

Ok(())
}
32 changes: 32 additions & 0 deletions tests/clear_content.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use html_streaming_editor::*;

const HTML_INPUT: &str = r#"<html>
<head></head>
<body>
<h1>Title</h1>
<p id="first-para">Some first text</p>
<p id="second-para">Some more text, even with an <img src=""></p>
<p id="third-para">Third text of <abbr>HTML</abbr>, but no <abbr>CSS</abbr></p>
<ul id="list">
<li id="item-1">1</li>
<li id="item-2">2</li>
<li id="item-3">3</li>
</ul>
</body>
</html>"#;

#[test]
fn clear_first_p_content() -> Result<(), StreamingEditorError> {
let command = "ONLY{#first-para} | CLEAR-CONTENT";

let mut input = Box::new(HTML_INPUT.as_bytes());
let mut output = Vec::new();
let hse = HtmlStreamingEditor::new(&mut input, &mut output);

let _ = hse.run(command)?;
let result_string = String::from_utf8(output).unwrap();

assert_eq!(result_string, String::from(r#"<p id="first-para"></p>"#));

Ok(())
}

0 comments on commit bc88552

Please sign in to comment.