Skip to content

Commit

Permalink
Merge pull request #39 from slackhq/iframe-content-improvements
Browse files Browse the repository at this point in the history
iFrame content improvements
  • Loading branch information
JPolacek authored Jan 27, 2022
2 parents ea671be + 11f7312 commit 203b84a
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 6 deletions.
23 changes: 21 additions & 2 deletions src/Lexer/DOMLex.hack
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer {
while (!$nodes[$level]->isEmpty()) {
$node = $nodes[$level]->dequeue();
$collect = $level > 0 ? true : false;
$needEndingTag = $this->createStartNode($node, inout $tokens, $collect, $config);
$needEndingTag = $this->createStartNode(inout $node, inout $tokens, $collect, $config);
if ($needEndingTag) {
$nodesAtlevel = new \SplStack();
$closingNodes[$level] = $nodesAtlevel;
Expand Down Expand Up @@ -135,7 +135,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer {
* start and close are collected, set to false at first recursion because we are dealing with the implicit DIV tag
*/
protected function createStartNode(
\DOMNode $node,
inout \DOMNode $node,
inout vec<HTMLPurifier\HTMLPurifier_Token> $tokens,
bool $collect,
HTMLPurifier\HTMLPurifier_Config $config,
Expand Down Expand Up @@ -185,6 +185,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier\HTMLPurifier_Lexer {
} else {
if ($collect) {
$tokens[] = $this->factory->createStart($tag_name, $attr);

/* If the node is an iframe, we don't want to sanitize the child nodes.
* The child nodes should be handled like text.
* This block adds the children (and their children and data) to a DOMDocument
* in order to convert the children to a string that can be converted into
* a HTMLPurifier_Token_Text
*/
if ($this->getTagName($node) === 'iframe') {
$doc = new \DOMDocument();
$children = vec($node->childNodes);
foreach ($children as $childNode) {
$doc->appendChild($doc->importNode($childNode, true));
$node->removeChild($childNode);
}

// Trim the one "\n" that is added when saving
$text = $doc->saveHTML() |> Str\slice($$, 0, Str\length($$) - 1);
$tokens[] = $this->factory->createText($text);
}
}
return true;
}
Expand Down
10 changes: 6 additions & 4 deletions tests/CommentRemovalTest.hack
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,11 @@ setTimeout(function(){
$clean_html3 = $purifier->purify($dirty_html3);
$clean_html4 = $purifier->purify($dirty_html4);

expect($clean_html1)->toEqual('--&gt;-&gt;--&gt;');
expect($clean_html2)->toEqual('-&gt;--&gt;');
expect($clean_html3)->toEqual('-&gt;--&gt;-&gt;--&gt;--&gt;-&gt;');
expect($clean_html4)->toEqual('-&gt;--&gt;-&gt;--&gt;--&gt;-&gt;-&gt;--&gt;--&gt;-&gt;--&gt;--&gt;-&gt;-&gt;');
expect($clean_html1)->toEqual('--&gt;-&gt;-&lt;!--&amp;--&gt;-&amp;gt;');
expect($clean_html2)->toEqual('-&gt;-&lt;!--&amp;--&gt;-&amp;gt;');
expect($clean_html3)->toEqual('-&gt;--&gt;-&gt;--&amp;gt;-&lt;!--&amp;--&gt;-&amp;gt;-&amp;gt;');
expect($clean_html4)->toEqual(
'-&gt;--&gt;-&gt;--&gt;--&gt;-&gt;-&gt;--&amp;gt;-&lt;!--&amp;--&gt;-&amp;gt;-&amp;gt;--&amp;gt;-&lt;!--&amp;--&gt;-&amp;gt;-&amp;gt;-&amp;gt;',
);
}
}
165 changes: 165 additions & 0 deletions tests/HandleIframeAsTextTest.hack
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/* Created by Jake Polacek on 01/20/2022 */

namespace HTMLPurifier\_Private\Tests;

use function Facebook\FBExpect\expect;
use type Facebook\HackTest\HackTest;
use namespace HTMLPurifier;
use namespace HTMLPurifier\{Strategy, Token, Lexer, Enums};

class HandleIframeAsTextTest extends HackTest {
public function testSingleNestedElement(): void {
echo "\ntestSingleNestedElement()...";
//porting over first config classes....
$config = HTMLPurifier\HTMLPurifier_Config::createDefault();
$policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault()
|> $$->addAllowedTag(Enums\HtmlTags::IFRAME);
$purifier = new HTMLPurifier\HTMLPurifier($config, $policy);
$dirty_html = '<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
<b></b>
</iframe>';
$clean_html = $purifier->purify($dirty_html);
expect($clean_html)->toEqual(
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
&lt;b&gt;&lt;/b&gt;
</iframe>',
);
echo "finished.\n\n";
}

public function testSingleNestedElementWithData(): void {
echo "\ntestSingleNestedElementWithData()...";
//porting over first config classes....
$config = HTMLPurifier\HTMLPurifier_Config::createDefault();
$policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault()
|> $$->addAllowedTag(Enums\HtmlTags::IFRAME);
$purifier = new HTMLPurifier\HTMLPurifier($config, $policy);
$dirty_html = '<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
<script>alert(1);</script>
</iframe>';
$clean_html = $purifier->purify($dirty_html);
expect($clean_html)->toEqual(
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
&lt;script&gt;alert(1);&lt;/script&gt;
</iframe>',
);
echo "finished.\n\n";
}

public function testMultipleNestedElementWithData(): void {
echo "\ntestMultipleNestedElementWithData()...";
//porting over first config classes....
$config = HTMLPurifier\HTMLPurifier_Config::createDefault();
$policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault()
|> $$->addAllowedTag(Enums\HtmlTags::IFRAME);
$purifier = new HTMLPurifier\HTMLPurifier($config, $policy);
$dirty_html = '<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
<script>alert(1);</script>
<script>alert(2);</script>
<script>alert(3);</script>
</iframe>';
$clean_html = $purifier->purify($dirty_html);
expect($clean_html)->toEqual(
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
&lt;script&gt;alert(1);&lt;/script&gt;
&lt;script&gt;alert(2);&lt;/script&gt;
&lt;script&gt;alert(3);&lt;/script&gt;
</iframe>',
);
echo "finished.\n\n";
}

public function testSingleDoublyNestedElement(): void {
echo "\ntestSingleDoublyNestedElement()...";
//porting over first config classes....
$config = HTMLPurifier\HTMLPurifier_Config::createDefault();
$policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault()
|> $$->addAllowedTag(Enums\HtmlTags::IFRAME);
$purifier = new HTMLPurifier\HTMLPurifier($config, $policy);
$dirty_html = '<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
<b><i>hello world</i></b>
</iframe>';
$clean_html = $purifier->purify($dirty_html);
expect($clean_html)->toEqual(
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
&lt;b&gt;&lt;i&gt;hello world&lt;/i&gt;&lt;/b&gt;
</iframe>',
);
echo "finished.\n\n";
}

public function testMultipleDoublyNestedElement(): void {
echo "\ntestMultipleDoublyNestedElement()...";
//porting over first config classes....
$config = HTMLPurifier\HTMLPurifier_Config::createDefault();
$policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault()
|> $$->addAllowedTag(Enums\HtmlTags::IFRAME);
$purifier = new HTMLPurifier\HTMLPurifier($config, $policy);
$dirty_html = '<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
<b><i>hello world1</i></b>
<b><i>hello world2</i></b>
</iframe>';
$clean_html = $purifier->purify($dirty_html);
expect($clean_html)->toEqual(
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
&lt;b&gt;&lt;i&gt;hello world1&lt;/i&gt;&lt;/b&gt;
&lt;b&gt;&lt;i&gt;hello world2&lt;/i&gt;&lt;/b&gt;
</iframe>',
);
echo "finished.\n\n";
}

public function testDoublyNestedElement(): void {
echo "\ntestDoublyNestedElement()...";
//porting over first config classes....
$config = HTMLPurifier\HTMLPurifier_Config::createDefault();
$policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault()
|> $$->addAllowedTag(Enums\HtmlTags::IFRAME);
$purifier = new HTMLPurifier\HTMLPurifier($config, $policy);
$dirty_html = '<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
<p><i>hello</i> <b>world</b></p>
</iframe>';
$clean_html = $purifier->purify($dirty_html);
expect($clean_html)->toEqual(
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
&lt;p&gt;&lt;i&gt;hello&lt;/i&gt; &lt;b&gt;world&lt;/b&gt;&lt;/p&gt;
</iframe>',
);
echo "finished.\n\n";
}

public function testMultiLineIframe(): void {
echo "\ntestSingleLineIframe()...";
//porting over first config classes....
$config = HTMLPurifier\HTMLPurifier_Config::createDefault();
$policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault()
|> $$->addAllowedTag(Enums\HtmlTags::IFRAME);
$purifier = new HTMLPurifier\HTMLPurifier($config, $policy);
$dirty_html = '<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
hello world
</iframe>';
$clean_html = $purifier->purify($dirty_html);
expect($clean_html)->toEqual(
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">
hello world
</iframe>',
);
echo "finished.\n\n";
}

public function testSingleLineIframe(): void {
echo "\ntestSingleLineIframe()...";
//porting over first config classes....
$config = HTMLPurifier\HTMLPurifier_Config::createDefault();
$policy = HTMLPurifier\HTMLPurifier_Policy::fromDefault()
|> $$->addAllowedTag(Enums\HtmlTags::IFRAME);
$purifier = new HTMLPurifier\HTMLPurifier($config, $policy);
$dirty_html =
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">hello world</iframe>';
$clean_html = $purifier->purify($dirty_html);
expect($clean_html)->toEqual(
'<iframe src="https://example.org" title="iframe Example 1" width="400" height="300">hello world</iframe>',
);
echo "finished.\n\n";
}
}

0 comments on commit 203b84a

Please sign in to comment.