Skip to content

Commit

Permalink
add case when HTML tags contain attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-Ngr committed Jan 9, 2025
1 parent 78bf506 commit 7e82f70
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 10 deletions.
4 changes: 2 additions & 2 deletions htdocs/includes/odtphp/odf.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ class Odf
public $userdefined=array();

const PIXEL_TO_CM = 0.026458333;
const FIND_TAGS_REGEX = '/<([A-Za-z0-9]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(((?!<\1>).)*)<\/\1>))/s';
const FIND_ENCODED_TAGS_REGEX = '/&lt;([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/&gt;)|(?:&gt;(((?!&lt;\1&gt;).)*)&lt;\/\1&gt;))/';
const FIND_TAGS_REGEX = '/<([A-Za-z0-9]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(((?!<\1(\s.*)?>).)*)<\/\1>))/s';
const FIND_ENCODED_TAGS_REGEX = '/&lt;([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/&gt;)|(?:&gt;(((?!&lt;\1(\s.*)?&gt;).)*)&lt;\/\1&gt;))/';


/**
Expand Down
22 changes: 14 additions & 8 deletions test/phpunit/ODFTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -302,33 +302,39 @@ public function testODFconvertVarToOdf()
'charset' => null,
'expected' => utf8_encode('text with <text:span text:style-name="boldText">two</text:span> (strong) <text:span text:style-name="boldText">tags</text:span>'),
],
25 => [
'to_convert' => "text with <strong class=\"whatever\">two</strong> (strong) <strong class=\"the weather\">tags and <u>intricated</u> underline </strong>",
'encode' => true,
'charset' => null,
'expected' => utf8_encode('text with <text:span text:style-name="boldText">two</text:span> (strong) <text:span text:style-name="boldText">tags and <text:span text:style-name="underlineText">intricated</text:span> underline </text:span>'),
],

// One can also pass html-encoded string to the method
25 => [
26 => [
'to_convert' => 'One&amp;two',
'encode' => true,
'charset' => null,
'expected' => 'One&amp;two',
],
26 => [
27 => [
'to_convert' => "text with &lt;strong&gt;strong, &lt;/strong&gt;&lt;em&gt;emphasis&lt;/em&gt; and &lt;u&gt;underlined&lt;/u&gt; words with &lt;i&gt;it@lic sp&amp;ciàlchärs éè l'&lt;/i&gt;",
'encode' => false,
'charset' => 'UTF-8',
'expected' => 'text with <text:span text:style-name="boldText">strong, </text:span><text:span text:style-name="italicText">emphasis</text:span> and <text:span text:style-name="underlineText">underlined</text:span> words with <text:span text:style-name="italicText">it@lic sp&ciàlchärs éè l\'</text:span>',
],
27 => [
28 => [
'to_convert' => "text with &lt;strong&gt;strong, &lt;/strong&gt;&lt;em&gt;emphasis&lt;/em&gt; and &lt;u&gt;underlined&lt;/u&gt; words with &lt;i&gt;it@lic sp&amp;ciàlchärs éè l'&lt;/i&gt;",
'encode' => true,
'charset' => 'UTF-8',
'expected' => 'text with <text:span text:style-name="boldText">strong, </text:span><text:span text:style-name="italicText">emphasis</text:span> and <text:span text:style-name="underlineText">underlined</text:span> words with <text:span text:style-name="italicText">it@lic sp&amp;ciàlchärs éè l&apos;</text:span>',
],
28 => [
29 => [
'to_convert' => "text with &lt;strong&gt;strong, &lt;/strong&gt;&lt;em&gt;emphasis&lt;/em&gt; and &lt;u&gt;underlined&lt;/u&gt; words with &lt;i&gt;it@lic sp&amp;ciàlchärs éè l'&lt;/i&gt;",
'encode' => false,
'charset' => null,
'expected' => utf8_encode('text with <text:span text:style-name="boldText">strong, </text:span><text:span text:style-name="italicText">emphasis</text:span> and <text:span text:style-name="underlineText">underlined</text:span> words with <text:span text:style-name="italicText">it@lic sp&ciàlchärs éè l\'</text:span>'),
],
29 => [
30 => [
'to_convert' => "text with &lt;strong&gt;strong, &lt;/strong&gt;&lt;em&gt;emphasis&lt;/em&gt; and &lt;u&gt;underlined&lt;/u&gt; words with &lt;i&gt;it@lic sp&amp;ciàlchärs éè l'&lt;/i&gt;",
'encode' => true,
'charset' => null,
Expand All @@ -347,20 +353,20 @@ public function testODFconvertVarToOdf()
// Following tests reflect the current behavior. They may evolve if the method behavior changes.

// The method removes hyperlinks and tags that are not dealt with.
30 => [
31 => [
'to_convert' => '123 <a href="/test.php">trucmachin > truc < troc > trac</a>bla bla',
'encode' => true,
'charset' => null,
'expected' => "123 trucmachin &gt; truc &lt; troc &gt; tracbla bla",
],
31 => [
32 => [
'to_convert' => '123 <h3>Title</h3> bla',
'encode' => true,
'charset' => null,
'expected' => "123 Title bla",
],
// HTML should not take \n into account, but only <br />.
32 => [
33 => [
'to_convert' => "text with <strong>strong text </strong>, a line\nbreak and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>",
'encode' => false,
'charset' => 'UTF-8',
Expand Down

0 comments on commit 7e82f70

Please sign in to comment.