From 7a2ffe718e983dfb12ac0298a636a61cc337506c Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Wed, 19 Jan 2022 18:04:10 -0800 Subject: [PATCH 1/9] Adding logic in the DOMLexer to handle the contents nested in an iFrame like text and corresponding tests --- src/Lexer/DOMLex.hack | 23 +++-- tests/HandleIframeAsTextTest.hack | 136 ++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 5 deletions(-) create mode 100644 tests/HandleIframeAsTextTest.hack diff --git a/src/Lexer/DOMLex.hack b/src/Lexer/DOMLex.hack index 4539c6c..60728a7 100644 --- a/src/Lexer/DOMLex.hack +++ b/src/Lexer/DOMLex.hack @@ -2,8 +2,8 @@ namespace HTMLPurifier\Lexer; use namespace HTMLPurifier; -use namespace HH\Lib\{C, Regex, Str}; -use namespace HTMLPurifier\Token; +use namespace HH\Lib\{C, Regex, Str, Vec}; +use namespace HTMLPurifier\{Enums, Token}; /** * Parser that uses Hacklang DOMNode. @@ -95,13 +95,15 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer { while (!$nodes[$level]->isEmpty()) { $node = $nodes[$level]->dequeue(); $collect = $level > 0 ? true : false; - $needEndingTag = $this->createStartNode($node, inout $tokens, $collect, $config); + $needEndingTag = $this->createStartNode(inout $node, inout $tokens, $collect, $config); if ($needEndingTag) { $nodesAtlevel = new \SplStack(); $closingNodes[$level] = $nodesAtlevel; $nodesAtlevel->push($node); } - if ($node->childNodes && $node->childNodes->length) { + if ( + $node->childNodes && $node->childNodes->length && $this->getTagName($node->parentNode) !== 'iframe' + ) { $level += 1; $nodes[$level] = new \SplQueue(); foreach ($node->childNodes as $childNode) { @@ -135,7 +137,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer { * start and close are collected, set to false at first recursion because we are dealing with the implicit DIV tag */ protected function createStartNode( - \DOMNode $node, + inout \DOMNode $node, inout vec $tokens, bool $collect, HTMLPurifier\HTMLPurifier_Config $config, @@ -185,6 +187,17 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer { } else { if ($collect) { $tokens[] = $this->factory->createStart($tag_name, $attr); + if ($this->getTagName($node) === 'iframe') { + $doc = new \DOMDocument(); + $children = vec($node->childNodes); + foreach ($children as $childNode) { + $tagName = $this->getTagName($childNode); + $doc->appendChild($doc->importNode($childNode, true)); + $node->removeChild($childNode); + } + $text = $doc->saveHTML(); + $tokens[] = $this->factory->createText($text); + } } return true; } diff --git a/tests/HandleIframeAsTextTest.hack b/tests/HandleIframeAsTextTest.hack new file mode 100644 index 0000000..0957370 --- /dev/null +++ b/tests/HandleIframeAsTextTest.hack @@ -0,0 +1,136 @@ +/* Created by Nikita Ashok and Jake Polacek on 08/04/2020 */ + +namespace HTMLPurifier\_Private\Tests; + +use function Facebook\FBExpect\expect; +use type Facebook\HackTest\HackTest; +use namespace HTMLPurifier; +use namespace HTMLPurifier\{Strategy, Token, Lexer, Enums}; + +class HandleIframeAsTextTest extends HackTest { + public function testSingleNestedElement(): void { + echo "\ntestSingleNestedElement()..."; + //porting over first config classes.... + $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); + $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() + |> $$->addAllowedTag(Enums\HtmlTags::IFRAME); + $purifier = new HTMLPurifier\HTMLPurifier($config, $policy); + $dirty_html = ''; + $clean_html = $purifier->purify($dirty_html); + expect($clean_html)->toEqual( + '', + ); + echo "finished.\n\n"; + } + + public function testSingleNestedElementWithData(): void { + echo "\ntestSingleNestedElement()..."; + //porting over first config classes.... + $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); + $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() + |> $$->addAllowedTag(Enums\HtmlTags::IFRAME); + $purifier = new HTMLPurifier\HTMLPurifier($config, $policy); + $dirty_html = ''; + $clean_html = $purifier->purify($dirty_html); + expect($clean_html)->toEqual( + '', + ); + echo "finished.\n\n"; + } + + public function testMultipleNestedElementWithData(): void { + echo "\ntestSingleNestedElement()..."; + //porting over first config classes.... + $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); + $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() + |> $$->addAllowedTag(Enums\HtmlTags::IFRAME); + $purifier = new HTMLPurifier\HTMLPurifier($config, $policy); + $dirty_html = ''; + $clean_html = $purifier->purify($dirty_html); + expect($clean_html)->toEqual( + '', + ); + echo "finished.\n\n"; + } + + public function testSingleDoublyNestedElement(): void { + echo "\ntestSingleNestedElement()..."; + //porting over first config classes.... + $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); + $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() + |> $$->addAllowedTag(Enums\HtmlTags::IFRAME); + $purifier = new HTMLPurifier\HTMLPurifier($config, $policy); + $dirty_html = ''; + $clean_html = $purifier->purify($dirty_html); + expect($clean_html)->toEqual( + '', + ); + echo "finished.\n\n"; + } + + public function testMultipleDoublyNestedElement(): void { + echo "\ntestSingleNestedElement()..."; + //porting over first config classes.... + $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); + $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() + |> $$->addAllowedTag(Enums\HtmlTags::IFRAME); + $purifier = new HTMLPurifier\HTMLPurifier($config, $policy); + $dirty_html = ''; + $clean_html = $purifier->purify($dirty_html); + expect($clean_html)->toEqual( + '', + ); + echo "finished.\n\n"; + } + + public function testDoublyNestedElement(): void { + echo "\ntestSingleNestedElement()..."; + //porting over first config classes.... + $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); + $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() + |> $$->addAllowedTag(Enums\HtmlTags::IFRAME); + $purifier = new HTMLPurifier\HTMLPurifier($config, $policy); + $dirty_html = ''; + $clean_html = $purifier->purify($dirty_html); + expect($clean_html)->toEqual( + '', + ); + echo "finished.\n\n"; + } +} From 6d11a428face00682abf45ac022edbec22d8f732 Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Thu, 20 Jan 2022 15:35:50 -0800 Subject: [PATCH 2/9] Updating tests and tweaking unnecessary logic --- src/Lexer/DOMLex.hack | 5 +++-- tests/CommentRemovalTest.hack | 17 +++++++++++++---- tests/HTMLPurifierTest.hack | 27 ++++++++++++++++++++++----- tests/HandleIframeAsTextTest.hack | 16 +++++----------- 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/src/Lexer/DOMLex.hack b/src/Lexer/DOMLex.hack index 60728a7..b76d71d 100644 --- a/src/Lexer/DOMLex.hack +++ b/src/Lexer/DOMLex.hack @@ -191,11 +191,12 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer { $doc = new \DOMDocument(); $children = vec($node->childNodes); foreach ($children as $childNode) { - $tagName = $this->getTagName($childNode); $doc->appendChild($doc->importNode($childNode, true)); $node->removeChild($childNode); } - $text = $doc->saveHTML(); + + // Convert the innerHTML to a string, replace trailing "\r\n\r\n" with just "\n" + $text = Str\trim_right($doc->saveHTML())."\n"; $tokens[] = $this->factory->createText($text); } } diff --git a/tests/CommentRemovalTest.hack b/tests/CommentRemovalTest.hack index 6eafded..cedae77 100644 --- a/tests/CommentRemovalTest.hack +++ b/tests/CommentRemovalTest.hack @@ -224,9 +224,18 @@ setTimeout(function(){ $clean_html3 = $purifier->purify($dirty_html3); $clean_html4 = $purifier->purify($dirty_html4); - expect($clean_html1)->toEqual('-->->-->'); - expect($clean_html2)->toEqual('->-->'); - expect($clean_html3)->toEqual('->-->->-->-->->'); - expect($clean_html4)->toEqual('->-->->-->-->->->-->-->->-->-->->->'); + // expect($clean_html1)->toEqual('-->->-->'); + expect($clean_html1)->toEqual("-->->-<!--&-->-&gt;\n"); + + // expect($clean_html2)->toEqual('->-->'); + expect($clean_html2)->toEqual("->-<!--&-->-&gt;\n"); + + // expect($clean_html3)->toEqual('->-->->-->-->->'); + expect($clean_html3)->toEqual("->-->->--&gt;-<!--&-->-&gt;-&gt;\n"); + + // expect($clean_html4)->toEqual('->-->->-->-->->->-->-->->-->-->->->'); + expect($clean_html4)->toEqual( + "->-->->-->-->->->--&gt;-<!--&-->-&gt;-&gt;--&gt;-<!--&-->-&gt;-&gt;-&gt;\n", + ); } } diff --git a/tests/HTMLPurifierTest.hack b/tests/HTMLPurifierTest.hack index 4c0d56d..95777a1 100644 --- a/tests/HTMLPurifierTest.hack +++ b/tests/HTMLPurifierTest.hack @@ -2,6 +2,8 @@ namespace HTMLPurifier\_Private\Tests; +use namespace HH; +use namespace HH\Lib\{C, Vec}; use function Facebook\FBExpect\expect; use type Facebook\HackTest\HackTest; use namespace HTMLPurifier; @@ -60,6 +62,21 @@ class HTMLPurifierTest extends HackTest { return $policy; } + private function compareTokenVecContents( + vec $vec1, + vec $vec2, + ): bool { + $vec1_len = C\count($vec1); + $vec2_len = C\count($vec2); + if ($vec1_len !== $vec2_len) return false; + + for ($i = 0; $i < $vec1_len; $i++) { + if (HH\idx($vec1, $i) !== HH\idx($vec2, $i)) return false; + } + + return true; + } + public function testMissingEndTags(): void { echo "\nrunning testMissingEndTags()..."; //porting over first config classes.... @@ -223,7 +240,7 @@ class HTMLPurifierTest extends HackTest { new Token\HTMLPurifier_Token_End("b", dict[]), ]; - expect($tokens)->toHaveSameContentAs($expected_tokens); + expect($tokens)->toBePHPEqual($expected_tokens); echo "finished.\n"; } @@ -251,10 +268,10 @@ class HTMLPurifierTest extends HackTest { $fn_tokens = $fix_nesting->execute($tokens, $config, $context); $va_tokens = $validate_attributes->execute($tokens, $config, $context); - expect($rfe_tokens)->toHaveSameContentAs($tokens); - expect($mwf_tokens)->toHaveSameContentAs($tokens); - expect($fn_tokens)->toHaveSameContentAs($tokens); - expect($va_tokens)->toHaveSameContentAs($tokens); + expect($rfe_tokens)->toEqual($tokens); + expect($mwf_tokens)->toEqual($tokens); + expect($fn_tokens)->toBePHPEqual($tokens); + expect($va_tokens)->toEqual($tokens); echo "finished.\n"; } diff --git a/tests/HandleIframeAsTextTest.hack b/tests/HandleIframeAsTextTest.hack index 0957370..7638293 100644 --- a/tests/HandleIframeAsTextTest.hack +++ b/tests/HandleIframeAsTextTest.hack @@ -22,14 +22,13 @@ class HandleIframeAsTextTest extends HackTest { expect($clean_html)->toEqual( '', ); echo "finished.\n\n"; } public function testSingleNestedElementWithData(): void { - echo "\ntestSingleNestedElement()..."; + echo "\ntestSingleNestedElementWithData()..."; //porting over first config classes.... $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() @@ -42,14 +41,13 @@ class HandleIframeAsTextTest extends HackTest { expect($clean_html)->toEqual( '', ); echo "finished.\n\n"; } public function testMultipleNestedElementWithData(): void { - echo "\ntestSingleNestedElement()..."; + echo "\ntestMultipleNestedElementWithData()..."; //porting over first config classes.... $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() @@ -66,14 +64,13 @@ class HandleIframeAsTextTest extends HackTest { <script>alert(1);</script> <script>alert(2);</script> <script>alert(3);</script> - ', ); echo "finished.\n\n"; } public function testSingleDoublyNestedElement(): void { - echo "\ntestSingleNestedElement()..."; + echo "\ntestSingleDoublyNestedElement()..."; //porting over first config classes.... $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() @@ -86,14 +83,13 @@ class HandleIframeAsTextTest extends HackTest { expect($clean_html)->toEqual( '', ); echo "finished.\n\n"; } public function testMultipleDoublyNestedElement(): void { - echo "\ntestSingleNestedElement()..."; + echo "\ntestMultipleDoublyNestedElement()..."; //porting over first config classes.... $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() @@ -108,14 +104,13 @@ class HandleIframeAsTextTest extends HackTest { '', ); echo "finished.\n\n"; } public function testDoublyNestedElement(): void { - echo "\ntestSingleNestedElement()..."; + echo "\ntestDoublyNestedElement()..."; //porting over first config classes.... $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() @@ -128,7 +123,6 @@ class HandleIframeAsTextTest extends HackTest { expect($clean_html)->toEqual( '', ); echo "finished.\n\n"; From 62eed918a9c9d4962a4d45ac74154f88a0841308 Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Thu, 20 Jan 2022 15:45:10 -0800 Subject: [PATCH 3/9] Removing some more unused code --- src/Lexer/DOMLex.hack | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Lexer/DOMLex.hack b/src/Lexer/DOMLex.hack index b76d71d..f3f57aa 100644 --- a/src/Lexer/DOMLex.hack +++ b/src/Lexer/DOMLex.hack @@ -2,8 +2,8 @@ namespace HTMLPurifier\Lexer; use namespace HTMLPurifier; -use namespace HH\Lib\{C, Regex, Str, Vec}; -use namespace HTMLPurifier\{Enums, Token}; +use namespace HH\Lib\{C, Regex, Str}; +use namespace HTMLPurifier\Token; /** * Parser that uses Hacklang DOMNode. @@ -101,9 +101,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer { $closingNodes[$level] = $nodesAtlevel; $nodesAtlevel->push($node); } - if ( - $node->childNodes && $node->childNodes->length && $this->getTagName($node->parentNode) !== 'iframe' - ) { + if ($node->childNodes && $node->childNodes->length) { $level += 1; $nodes[$level] = new \SplQueue(); foreach ($node->childNodes as $childNode) { From a590f7266f4392c9a95a46af410981a305aa0df4 Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Thu, 20 Jan 2022 15:53:12 -0800 Subject: [PATCH 4/9] Tweaked docstring, comments, and cleaned up more unused code --- src/Lexer/DOMLex.hack | 9 ++++++++- tests/HTMLPurifierTest.hack | 17 ----------------- tests/HandleIframeAsTextTest.hack | 2 +- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/src/Lexer/DOMLex.hack b/src/Lexer/DOMLex.hack index f3f57aa..4fa037a 100644 --- a/src/Lexer/DOMLex.hack +++ b/src/Lexer/DOMLex.hack @@ -185,6 +185,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer { } else { if ($collect) { $tokens[] = $this->factory->createStart($tag_name, $attr); + + /* If the node is an iframe, we don't want to sanitize the child nodes. + * The child nodes should be handled like text. + * This block adds the children (and their children and data) to a DOMDocument + * in order to convert the children to a string that can be converted into + * a HTMLPurifier_Token_Text + */ if ($this->getTagName($node) === 'iframe') { $doc = new \DOMDocument(); $children = vec($node->childNodes); @@ -193,7 +200,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer { $node->removeChild($childNode); } - // Convert the innerHTML to a string, replace trailing "\r\n\r\n" with just "\n" + // Convert the innerHTML to a string and replace trailing "\r\n\r\n" with just "\n" $text = Str\trim_right($doc->saveHTML())."\n"; $tokens[] = $this->factory->createText($text); } diff --git a/tests/HTMLPurifierTest.hack b/tests/HTMLPurifierTest.hack index 95777a1..dae8249 100644 --- a/tests/HTMLPurifierTest.hack +++ b/tests/HTMLPurifierTest.hack @@ -2,8 +2,6 @@ namespace HTMLPurifier\_Private\Tests; -use namespace HH; -use namespace HH\Lib\{C, Vec}; use function Facebook\FBExpect\expect; use type Facebook\HackTest\HackTest; use namespace HTMLPurifier; @@ -62,21 +60,6 @@ class HTMLPurifierTest extends HackTest { return $policy; } - private function compareTokenVecContents( - vec $vec1, - vec $vec2, - ): bool { - $vec1_len = C\count($vec1); - $vec2_len = C\count($vec2); - if ($vec1_len !== $vec2_len) return false; - - for ($i = 0; $i < $vec1_len; $i++) { - if (HH\idx($vec1, $i) !== HH\idx($vec2, $i)) return false; - } - - return true; - } - public function testMissingEndTags(): void { echo "\nrunning testMissingEndTags()..."; //porting over first config classes.... diff --git a/tests/HandleIframeAsTextTest.hack b/tests/HandleIframeAsTextTest.hack index 7638293..4c23ef0 100644 --- a/tests/HandleIframeAsTextTest.hack +++ b/tests/HandleIframeAsTextTest.hack @@ -1,4 +1,4 @@ -/* Created by Nikita Ashok and Jake Polacek on 08/04/2020 */ +/* Created by Jake Polacek on 01/20/2022 */ namespace HTMLPurifier\_Private\Tests; From e6d61a92c876806dd85aa7285688a71b63656b48 Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Fri, 21 Jan 2022 10:21:33 -0800 Subject: [PATCH 5/9] Updating composer.json allowed to try fixing failing build --- composer.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/composer.json b/composer.json index f4af81b..677eb81 100644 --- a/composer.json +++ b/composer.json @@ -4,7 +4,7 @@ "require": { "hhvm/hhast": "^4.53.4", "facebook/fbexpect": "^2.7.4", - "hhvm/hhvm-autoload": "^2.0|^3.0", + "hhvm/hhvm-autoload": "=3.3.0", "hhvm/hacktest": "^2.1.0", "hhvm/hsl": "^4.41.0", "hhvm/hsl-experimental": "^4.50.0", @@ -17,5 +17,10 @@ "post-update-cmd": [ "find vendor -type d -name tests -or -name docs -or -name .git | xargs -n1 rm -rf" ] + }, + "config": { + "allow-plugins": { + "hhvm/hhvm-autoload": false + } } } From 9bb7fca991393dd652f24da55ff3c0445af22bf8 Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Fri, 21 Jan 2022 10:26:36 -0800 Subject: [PATCH 6/9] Allowing hhvm-autoload --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 677eb81..542a9dc 100644 --- a/composer.json +++ b/composer.json @@ -20,7 +20,7 @@ }, "config": { "allow-plugins": { - "hhvm/hhvm-autoload": false + "hhvm/hhvm-autoload": true } } } From 2514a8c20bdddc620c849cfd47c5f562a7491a5e Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Fri, 21 Jan 2022 10:29:12 -0800 Subject: [PATCH 7/9] Revert to original composer.json --- composer.json | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/composer.json b/composer.json index 542a9dc..f4af81b 100644 --- a/composer.json +++ b/composer.json @@ -4,7 +4,7 @@ "require": { "hhvm/hhast": "^4.53.4", "facebook/fbexpect": "^2.7.4", - "hhvm/hhvm-autoload": "=3.3.0", + "hhvm/hhvm-autoload": "^2.0|^3.0", "hhvm/hacktest": "^2.1.0", "hhvm/hsl": "^4.41.0", "hhvm/hsl-experimental": "^4.50.0", @@ -17,10 +17,5 @@ "post-update-cmd": [ "find vendor -type d -name tests -or -name docs -or -name .git | xargs -n1 rm -rf" ] - }, - "config": { - "allow-plugins": { - "hhvm/hhvm-autoload": true - } } } From b25ea765849a8b255f677809f835b625673beb7c Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Mon, 24 Jan 2022 13:14:04 -0800 Subject: [PATCH 8/9] Cleaning up commented out test cases --- tests/CommentRemovalTest.hack | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/CommentRemovalTest.hack b/tests/CommentRemovalTest.hack index cedae77..02b5ebb 100644 --- a/tests/CommentRemovalTest.hack +++ b/tests/CommentRemovalTest.hack @@ -224,16 +224,9 @@ setTimeout(function(){ $clean_html3 = $purifier->purify($dirty_html3); $clean_html4 = $purifier->purify($dirty_html4); - // expect($clean_html1)->toEqual('-->->-->'); expect($clean_html1)->toEqual("-->->-<!--&-->-&gt;\n"); - - // expect($clean_html2)->toEqual('->-->'); expect($clean_html2)->toEqual("->-<!--&-->-&gt;\n"); - - // expect($clean_html3)->toEqual('->-->->-->-->->'); expect($clean_html3)->toEqual("->-->->--&gt;-<!--&-->-&gt;-&gt;\n"); - - // expect($clean_html4)->toEqual('->-->->-->-->->->-->-->->-->-->->->'); expect($clean_html4)->toEqual( "->-->->-->-->->->--&gt;-<!--&-->-&gt;-&gt;--&gt;-<!--&-->-&gt;-&gt;-&gt;\n", ); From b9a6918b169a00997ef22169ecbe6c9964c607db Mon Sep 17 00:00:00 2001 From: Jake Polacek Date: Wed, 26 Jan 2022 13:37:38 -0800 Subject: [PATCH 9/9] Fixing new line bug --- src/Lexer/DOMLex.hack | 4 ++-- tests/CommentRemovalTest.hack | 8 +++---- tests/HandleIframeAsTextTest.hack | 35 +++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/Lexer/DOMLex.hack b/src/Lexer/DOMLex.hack index 4fa037a..9c91ce9 100644 --- a/src/Lexer/DOMLex.hack +++ b/src/Lexer/DOMLex.hack @@ -200,8 +200,8 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer { $node->removeChild($childNode); } - // Convert the innerHTML to a string and replace trailing "\r\n\r\n" with just "\n" - $text = Str\trim_right($doc->saveHTML())."\n"; + // Trim the one "\n" that is added when saving + $text = $doc->saveHTML() |> Str\slice($$, 0, Str\length($$) - 1); $tokens[] = $this->factory->createText($text); } } diff --git a/tests/CommentRemovalTest.hack b/tests/CommentRemovalTest.hack index 02b5ebb..70e248a 100644 --- a/tests/CommentRemovalTest.hack +++ b/tests/CommentRemovalTest.hack @@ -224,11 +224,11 @@ setTimeout(function(){ $clean_html3 = $purifier->purify($dirty_html3); $clean_html4 = $purifier->purify($dirty_html4); - expect($clean_html1)->toEqual("-->->-<!--&-->-&gt;\n"); - expect($clean_html2)->toEqual("->-<!--&-->-&gt;\n"); - expect($clean_html3)->toEqual("->-->->--&gt;-<!--&-->-&gt;-&gt;\n"); + expect($clean_html1)->toEqual('-->->-<!--&-->-&gt;'); + expect($clean_html2)->toEqual('->-<!--&-->-&gt;'); + expect($clean_html3)->toEqual('->-->->--&gt;-<!--&-->-&gt;-&gt;'); expect($clean_html4)->toEqual( - "->-->->-->-->->->--&gt;-<!--&-->-&gt;-&gt;--&gt;-<!--&-->-&gt;-&gt;-&gt;\n", + '->-->->-->-->->->--&gt;-<!--&-->-&gt;-&gt;--&gt;-<!--&-->-&gt;-&gt;-&gt;', ); } } diff --git a/tests/HandleIframeAsTextTest.hack b/tests/HandleIframeAsTextTest.hack index 4c23ef0..b3eed9f 100644 --- a/tests/HandleIframeAsTextTest.hack +++ b/tests/HandleIframeAsTextTest.hack @@ -127,4 +127,39 @@ class HandleIframeAsTextTest extends HackTest { ); echo "finished.\n\n"; } + + public function testMultiLineIframe(): void { + echo "\ntestSingleLineIframe()..."; + //porting over first config classes.... + $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); + $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() + |> $$->addAllowedTag(Enums\HtmlTags::IFRAME); + $purifier = new HTMLPurifier\HTMLPurifier($config, $policy); + $dirty_html = ''; + $clean_html = $purifier->purify($dirty_html); + expect($clean_html)->toEqual( + '', + ); + echo "finished.\n\n"; + } + + public function testSingleLineIframe(): void { + echo "\ntestSingleLineIframe()..."; + //porting over first config classes.... + $config = HTMLPurifier\HTMLPurifier_Config::createDefault(); + $policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault() + |> $$->addAllowedTag(Enums\HtmlTags::IFRAME); + $purifier = new HTMLPurifier\HTMLPurifier($config, $policy); + $dirty_html = + ''; + $clean_html = $purifier->purify($dirty_html); + expect($clean_html)->toEqual( + '', + ); + echo "finished.\n\n"; + } }