0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-16 17:35:19 +00:00

Fix quadratic behavior in DOMLex due to array_shift.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang 2013-09-17 00:48:42 -07:00
parent cf44f399f8
commit 412bae13b5
6 changed files with 188 additions and 128 deletions

2
NEWS
View File

@ -17,6 +17,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
Michael Gusev <mgusev@sugarcrm.com> for fixing.
! New directive %Core.AllowHostnameUnderscore which allows underscores
in hostnames.
- Eliminate quadratic behavior in DOMLex by using a proper queue.
Thanks Ole Laursen for noticing this.
- Made Linkify URL parser a bit less permissive, so that non-breaking
spaces and commas are not included as part of URL. Thanks nAS for fixing.
- Fix some bad interactions with %HTML.Allowed and injectors. Thanks

View File

@ -2,551 +2,551 @@
<usage>
<directive id="Core.CollectErrors">
<file name="HTMLPurifier.php">
<line>150</line>
<line>162</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>81</line>
<line>284</line>
<line>85</line>
<line>315</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>53</line>
<line>73</line>
<line>348</line>
<line>67</line>
<line>87</line>
<line>385</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>50</line>
<line>57</line>
</file>
</directive>
<directive id="CSS.MaxImgLength">
<file name="HTMLPurifier/CSSDefinition.php">
<line>157</line>
<line>226</line>
</file>
</directive>
<directive id="CSS.Proprietary">
<file name="HTMLPurifier/CSSDefinition.php">
<line>215</line>
<line>319</line>
</file>
</directive>
<directive id="CSS.AllowTricky">
<file name="HTMLPurifier/CSSDefinition.php">
<line>219</line>
<line>323</line>
</file>
</directive>
<directive id="CSS.Trusted">
<file name="HTMLPurifier/CSSDefinition.php">
<line>223</line>
<line>327</line>
</file>
</directive>
<directive id="CSS.AllowImportant">
<file name="HTMLPurifier/CSSDefinition.php">
<line>227</line>
<line>331</line>
</file>
</directive>
<directive id="CSS.AllowedProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>302</line>
<line>447</line>
</file>
</directive>
<directive id="CSS.ForbiddenProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>316</line>
<line>463</line>
</file>
</directive>
<directive id="Cache.DefinitionImpl">
<file name="HTMLPurifier/DefinitionCacheFactory.php">
<line>59</line>
<line>66</line>
</file>
</directive>
<directive id="HTML.Doctype">
<file name="HTMLPurifier/DoctypeRegistry.php">
<line>83</line>
<line>119</line>
</file>
</directive>
<directive id="HTML.CustomDoctype">
<file name="HTMLPurifier/DoctypeRegistry.php">
<line>85</line>
<line>123</line>
</file>
</directive>
<directive id="HTML.XHTML">
<file name="HTMLPurifier/DoctypeRegistry.php">
<line>88</line>
<line>128</line>
</file>
</directive>
<directive id="HTML.Strict">
<file name="HTMLPurifier/DoctypeRegistry.php">
<line>93</line>
<line>133</line>
</file>
</directive>
<directive id="Core.Encoding">
<file name="HTMLPurifier/Encoder.php">
<line>337</line>
<line>372</line>
<line>374</line>
<line>422</line>
</file>
</directive>
<directive id="Test.ForceNoIconv">
<file name="HTMLPurifier/Encoder.php">
<line>341</line>
<line>379</line>
<line>382</line>
<line>433</line>
</file>
</directive>
<directive id="Core.EscapeNonASCIICharacters">
<file name="HTMLPurifier/Encoder.php">
<line>373</line>
<line>423</line>
</file>
</directive>
<directive id="Output.CommentScriptContents">
<file name="HTMLPurifier/Generator.php">
<line>61</line>
<line>70</line>
</file>
</directive>
<directive id="Output.FixInnerHTML">
<file name="HTMLPurifier/Generator.php">
<line>62</line>
<line>71</line>
</file>
</directive>
<directive id="Output.SortAttr">
<file name="HTMLPurifier/Generator.php">
<line>63</line>
<line>72</line>
</file>
</directive>
<directive id="Output.FlashCompat">
<file name="HTMLPurifier/Generator.php">
<line>64</line>
<line>73</line>
</file>
</directive>
<directive id="Output.TidyFormat">
<file name="HTMLPurifier/Generator.php">
<line>93</line>
<line>104</line>
</file>
</directive>
<directive id="Core.NormalizeNewlines">
<file name="HTMLPurifier/Generator.php">
<line>107</line>
<line>122</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>266</line>
<line>297</line>
</file>
</directive>
<directive id="Output.Newline">
<file name="HTMLPurifier/Generator.php">
<line>108</line>
<line>123</line>
</file>
</directive>
<directive id="HTML.BlockWrapper">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>222</line>
<line>263</line>
</file>
</directive>
<directive id="HTML.Parent">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>230</line>
<line>273</line>
</file>
</directive>
<directive id="HTML.AllowedElements">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>247</line>
<line>291</line>
</file>
</directive>
<directive id="HTML.AllowedAttributes">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>248</line>
<line>292</line>
</file>
</directive>
<directive id="HTML.Allowed">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>251</line>
<line>295</line>
</file>
</directive>
<directive id="HTML.ForbiddenElements">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>342</line>
<line>399</line>
</file>
</directive>
<directive id="HTML.ForbiddenAttributes">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>343</line>
<line>400</line>
</file>
</directive>
<directive id="HTML.Trusted">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>204</line>
<line>234</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>271</line>
<line>302</line>
</file>
<file name="HTMLPurifier/HTMLModule/Image.php">
<line>27</line>
<line>37</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>36</line>
<line>47</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>23</line>
<line>30</line>
</file>
</directive>
<directive id="HTML.AllowedModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>211</line>
<line>241</line>
</file>
</directive>
<directive id="HTML.CoreModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>212</line>
<line>242</line>
</file>
</directive>
<directive id="HTML.Proprietary">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>222</line>
<line>256</line>
</file>
</directive>
<directive id="HTML.SafeObject">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>225</line>
<line>259</line>
</file>
</directive>
<directive id="HTML.SafeEmbed">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>228</line>
<line>262</line>
</file>
</directive>
<directive id="HTML.SafeScripting">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>231</line>
<line>265</line>
</file>
<file name="HTMLPurifier/HTMLModule/SafeScripting.php">
<line>17</line>
<line>22</line>
</file>
</directive>
<directive id="HTML.Nofollow">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>234</line>
<line>268</line>
</file>
</directive>
<directive id="HTML.TargetBlank">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>237</line>
<line>271</line>
</file>
</directive>
<directive id="Attr.IDBlacklist">
<file name="HTMLPurifier/IDAccumulator.php">
<line>26</line>
<line>27</line>
</file>
</directive>
<directive id="Core.Language">
<file name="HTMLPurifier/LanguageFactory.php">
<line>88</line>
<line>93</line>
</file>
</directive>
<directive id="Core.LexerImpl">
<file name="HTMLPurifier/Lexer.php">
<line>76</line>
<line>80</line>
</file>
</directive>
<directive id="Core.MaintainLineNumbers">
<file name="HTMLPurifier/Lexer.php">
<line>80</line>
<line>84</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>48</line>
<line>62</line>
</file>
</directive>
<directive id="Core.ConvertDocumentToFragment">
<file name="HTMLPurifier/Lexer.php">
<line>282</line>
<line>313</line>
</file>
</directive>
<directive id="Core.RemoveProcessingInstructions">
<file name="HTMLPurifier/Lexer.php">
<line>303</line>
<line>334</line>
</file>
</directive>
<directive id="URI.">
<file name="HTMLPurifier/URIDefinition.php">
<line>60</line>
<line>65</line>
</file>
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>12</line>
<line>46</line>
</file>
</directive>
<directive id="URI.Host">
<file name="HTMLPurifier/URIDefinition.php">
<line>70</line>
<line>76</line>
</file>
<file name="HTMLPurifier/URIScheme.php">
<line>81</line>
<line>89</line>
</file>
</directive>
<directive id="URI.Base">
<file name="HTMLPurifier/URIDefinition.php">
<line>71</line>
<line>77</line>
</file>
</directive>
<directive id="URI.DefaultScheme">
<file name="HTMLPurifier/URIDefinition.php">
<line>78</line>
<line>84</line>
</file>
</directive>
<directive id="URI.AllowedSchemes">
<file name="HTMLPurifier/URISchemeRegistry.php">
<line>41</line>
<line>48</line>
</file>
</directive>
<directive id="URI.OverrideAllowedSchemes">
<file name="HTMLPurifier/URISchemeRegistry.php">
<line>42</line>
<line>49</line>
</file>
</directive>
<directive id="URI.Disable">
<file name="HTMLPurifier/AttrDef/URI.php">
<line>28</line>
<line>47</line>
</file>
</directive>
<directive id="Core.ColorKeywords">
<file name="HTMLPurifier/AttrDef/CSS/Color.php">
<line>12</line>
<line>19</line>
</file>
<file name="HTMLPurifier/AttrDef/HTML/Color.php">
<line>12</line>
<line>19</line>
</file>
</directive>
<directive id="CSS.AllowedFonts">
<file name="HTMLPurifier/AttrDef/CSS/FontFamily.php">
<line>50</line>
<line>64</line>
</file>
</directive>
<directive id="Attr.AllowedClasses">
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
<line>18</line>
<line>33</line>
</file>
</directive>
<directive id="Attr.ForbiddenClasses">
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
<line>19</line>
<line>34</line>
</file>
</directive>
<directive id="Attr.AllowedFrameTargets">
<file name="HTMLPurifier/AttrDef/HTML/FrameTarget.php">
<line>15</line>
<line>32</line>
</file>
</directive>
<directive id="Attr.EnableID">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>30</line>
<line>41</line>
</file>
</directive>
<directive id="Attr.IDPrefix">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>36</line>
<line>51</line>
</file>
</directive>
<directive id="Attr.IDPrefixLocal">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>38</line>
<line>41</line>
<line>53</line>
<line>58</line>
</file>
</directive>
<directive id="Attr.IDBlacklistRegexp">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>64</line>
<line>89</line>
</file>
</directive>
<directive id="Attr.">
<file name="HTMLPurifier/AttrDef/HTML/LinkTypes.php">
<line>30</line>
<line>46</line>
</file>
</directive>
<directive id="Core.AllowHostnameUnderscore">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>61</line>
<line>77</line>
</file>
</directive>
<directive id="Core.EnableIDNA">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>80</line>
<line>96</line>
</file>
</directive>
<directive id="Attr.DefaultTextDir">
<file name="HTMLPurifier/AttrTransform/BdoDir.php">
<line>13</line>
<line>22</line>
</file>
</directive>
<directive id="Core.RemoveInvalidImg">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>18</line>
<line>24</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>20</line>
<line>27</line>
</file>
</directive>
<directive id="Attr.DefaultInvalidImage">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>19</line>
<line>27</line>
</file>
</directive>
<directive id="Attr.DefaultImageAlt">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>25</line>
<line>33</line>
</file>
</directive>
<directive id="Attr.DefaultInvalidImageAlt">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>33</line>
<line>41</line>
</file>
</directive>
<directive id="HTML.Attr.Name.UseCDATA">
<file name="HTMLPurifier/AttrTransform/Name.php">
<line>11</line>
<line>18</line>
</file>
<file name="HTMLPurifier/HTMLModule/Name.php">
<line>13</line>
<line>19</line>
</file>
</directive>
<directive id="HTML.FlashAllowFullScreen">
<file name="HTMLPurifier/AttrTransform/SafeParam.php">
<line>38</line>
<line>53</line>
</file>
</directive>
<directive id="Core.EscapeInvalidChildren">
<file name="HTMLPurifier/ChildDef/Required.php">
<line>62</line>
<line>86</line>
</file>
</directive>
<directive id="Cache.SerializerPath">
<file name="HTMLPurifier/DefinitionCache/Serializer.php">
<line>91</line>
<line>171</line>
</file>
</directive>
<directive id="Cache.SerializerPermissions">
<file name="HTMLPurifier/DefinitionCache/Serializer.php">
<line>107</line>
<line>124</line>
<line>188</line>
<line>206</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>55</line>
<line>94</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Scope">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>79</line>
<line>122</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Escaping">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>277</line>
<line>327</line>
</file>
</directive>
<directive id="HTML.SafeIframe">
<file name="HTMLPurifier/HTMLModule/Iframe.php">
<line>17</line>
<line>28</line>
</file>
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>23</line>
<line>48</line>
</file>
</directive>
<directive id="HTML.MaxImgLength">
<file name="HTMLPurifier/HTMLModule/Image.php">
<line>14</line>
<line>21</line>
</file>
<file name="HTMLPurifier/HTMLModule/SafeEmbed.php">
<line>13</line>
<line>18</line>
</file>
<file name="HTMLPurifier/HTMLModule/SafeObject.php">
<line>19</line>
<line>24</line>
</file>
</directive>
<directive id="HTML.TidyLevel">
<file name="HTMLPurifier/HTMLModule/Tidy.php">
<line>45</line>
<line>50</line>
</file>
</directive>
<directive id="HTML.TidyAdd">
<file name="HTMLPurifier/HTMLModule/Tidy.php">
<line>49</line>
<line>54</line>
</file>
</directive>
<directive id="HTML.TidyRemove">
<file name="HTMLPurifier/HTMLModule/Tidy.php">
<line>50</line>
<line>55</line>
</file>
</directive>
<directive id="AutoFormat.PurifierLinkify.DocURL">
<file name="HTMLPurifier/Injector/PurifierLinkify.php">
<line>15</line>
<line>31</line>
</file>
</directive>
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>15</line>
<line>46</line>
</file>
</directive>
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>16</line>
<line>47</line>
</file>
</directive>
<directive id="Core.AggressivelyFixLt">
<file name="HTMLPurifier/Lexer/DOMLex.php">
<line>44</line>
<line>54</line>
</file>
</directive>
<directive id="Core.DirectLexLineNumberSyncInterval">
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>70</line>
<line>84</line>
</file>
</directive>
<directive id="Core.DisableExcludes">
<file name="HTMLPurifier/Strategy/FixNesting.php">
<line>57</line>
<line>64</line>
</file>
</directive>
<directive id="Core.EscapeInvalidTags">
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
<line>53</line>
<line>66</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>19</line>
<line>26</line>
</file>
</directive>
<directive id="HTML.AllowedComments">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>24</line>
<line>31</line>
</file>
</directive>
<directive id="HTML.AllowedCommentsRegexp">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>25</line>
<line>32</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>28</line>
<line>35</line>
</file>
</directive>
<directive id="Core.HiddenElements">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>29</line>
<line>36</line>
</file>
</directive>
<directive id="URI.HostBlacklist">
<file name="HTMLPurifier/URIFilter/HostBlacklist.php">
<line>12</line>
<line>25</line>
</file>
</directive>
<directive id="URI.MungeResources">
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>14</line>
<line>48</line>
</file>
</directive>
<directive id="URI.MungeSecretKey">
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>15</line>
<line>49</line>
</file>
</directive>
<directive id="URI.SafeIframeRegexp">
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>18</line>
<line>35</line>
</file>
</directive>
</usage>

View File

@ -57,6 +57,7 @@ require 'HTMLPurifier/Lexer.php';
require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/PropertyList.php';
require 'HTMLPurifier/PropertyListIterator.php';
require 'HTMLPurifier/Queue.php';
require 'HTMLPurifier/Strategy.php';
require 'HTMLPurifier/StringHash.php';
require 'HTMLPurifier/StringHashParser.php';

View File

@ -51,6 +51,7 @@ require_once $__dir . '/HTMLPurifier/Lexer.php';
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
require_once $__dir . '/HTMLPurifier/PropertyList.php';
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
require_once $__dir . '/HTMLPurifier/Queue.php';
require_once $__dir . '/HTMLPurifier/Strategy.php';
require_once $__dir . '/HTMLPurifier/StringHash.php';
require_once $__dir . '/HTMLPurifier/StringHashParser.php';

View File

@ -92,11 +92,11 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
protected function tokenizeDOM($node, &$tokens)
{
$level = 0;
$nodes = array($level => array($node));
$nodes = array($level => new HTMLPurifier_Queue(array($node)));
$closingNodes = array();
do {
while (!empty($nodes[$level])) {
$node = array_shift($nodes[$level]); // FIFO
while (!$nodes[$level]->isEmpty()) {
$node = $nodes[$level]->shift(); // FIFO
$collect = $level > 0 ? true : false;
$needEndingTag = $this->createStartNode($node, $tokens, $collect);
if ($needEndingTag) {
@ -104,9 +104,9 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
}
if ($node->childNodes && $node->childNodes->length) {
$level++;
$nodes[$level] = array();
$nodes[$level] = new HTMLPurifier_Queue();
foreach ($node->childNodes as $childNode) {
array_push($nodes[$level], $childNode);
$nodes[$level]->push($childNode);
}
}
}

View File

@ -0,0 +1,56 @@
<?php
/**
* A simple array-backed queue, based off of the classic Okasaki
* persistent amortized queue. The basic idea is to maintain two
* stacks: an input stack and an output stack. When the output
* stack runs out, reverse the input stack and use it as the output
* stack.
*
* We don't use the SPL implementation because it's only supported
* on PHP 5.3 and later.
*
* Exercise: Prove that push/pop on this queue take amortized O(1) time.
*
* Exercise: Extend this queue to be a deque, while preserving amortized
* O(1) time. Some care must be taken on rebalancing to avoid quadratic
* behaviour caused by repeatedly shuffling data from the input stack
* to the output stack and back.
*/
class HTMLPurifier_Queue {
private $input;
private $output;
public function __construct($input = array()) {
$this->input = $input;
$this->output = array();
}
/**
* Shifts an element off the front of the queue.
*/
public function shift() {
if (empty($this->output)) {
$this->output = array_reverse($this->input);
$this->input = array();
}
if (empty($this->output)) {
return NULL;
}
return array_pop($this->output);
}
/**
* Pushes an element onto the front of the queue.
*/
public function push($x) {
array_push($this->input, $x);
}
/**
* Checks if it's empty.
*/
public function isEmpty() {
return empty($this->input) && empty($this->output);
}
}