0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-09 15:28:40 +00:00

Implement Iframe module, and provide %HTML.SafeIframe and %URI.SafeIframeRegexp for untrusted usage.

The purpose of this addition is twofold. In trusted mode, iframes are
now unconditionally allowed.

However, many online video providers (YouTube, Vimeo) and other web
applications (Google Maps, Google Calendar, etc) provide embed code in
iframe format, which is useful functionality in untrusted mode.
You can specify iframes as trusted elements with %HTML.SafeIframe;
however, you need to additionally specify a whitelist mechanism such as
%URI.SafeIframeRegexp to say what iframe embeds are OK (by default
everything is rejected).

Note: As iframes are invalid in strict doctypes, you will not be able to
use them there.

We also added an always_load parameter to URIFilters in order to support
the strange nature of the SafeIframe URIFilter (it always needs to be
loaded, due to the inability of accessing the %HTML.SafeIframe directive
to see if it's needed!)  We expect this URIFilter can expand in the future
to offer more complex validation mechanisms.

Signed-off-by: Bradley M. Froehle <brad.froehle@gmail.com>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Bradley M. Froehle 2011-02-13 17:47:01 -08:00 committed by Edward Z. Yang
parent 1e5293d9fe
commit 4164b2eb2b
17 changed files with 196 additions and 21 deletions

3
NEWS
View File

@ -20,6 +20,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
a standards compliant way, by moving them into the preceding <li>
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
limited allowed comments in untrusted situations.
! Implement iframes, and allow them to be used in untrusted mode with
%HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle
<brad.froehle@gmail.com> for submitting an initial version of the patch.
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
<yramirez-htmlpurifier@adicio.com> for reporting.
- Explicitly initialize anonModule variable to null.

View File

@ -169,7 +169,7 @@
</directive>
<directive id="HTML.Trusted">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>202</line>
<line>204</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>271</line>
@ -186,37 +186,37 @@
</directive>
<directive id="HTML.AllowedModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>209</line>
<line>211</line>
</file>
</directive>
<directive id="HTML.CoreModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>210</line>
<line>212</line>
</file>
</directive>
<directive id="HTML.Proprietary">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>220</line>
<line>222</line>
</file>
</directive>
<directive id="HTML.SafeObject">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>223</line>
<line>225</line>
</file>
</directive>
<directive id="HTML.SafeEmbed">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>226</line>
<line>228</line>
</file>
</directive>
<directive id="HTML.Nofollow">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>229</line>
<line>231</line>
</file>
</directive>
<directive id="HTML.TargetBlank">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>232</line>
<line>234</line>
</file>
</directive>
<directive id="Attr.IDBlacklist">
@ -254,7 +254,7 @@
</directive>
<directive id="URI.">
<file name="HTMLPurifier/URIDefinition.php">
<line>55</line>
<line>59</line>
</file>
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>12</line>
@ -262,7 +262,7 @@
</directive>
<directive id="URI.Host">
<file name="HTMLPurifier/URIDefinition.php">
<line>64</line>
<line>69</line>
</file>
<file name="HTMLPurifier/URIScheme.php">
<line>81</line>
@ -270,12 +270,12 @@
</directive>
<directive id="URI.Base">
<file name="HTMLPurifier/URIDefinition.php">
<line>65</line>
<line>70</line>
</file>
</directive>
<directive id="URI.DefaultScheme">
<file name="HTMLPurifier/URIDefinition.php">
<line>72</line>
<line>77</line>
</file>
</directive>
<directive id="URI.AllowedSchemes">
@ -419,6 +419,14 @@
<line>123</line>
</file>
</directive>
<directive id="HTML.SafeIframe">
<file name="HTMLPurifier/HTMLModule/Iframe.php">
<line>17</line>
</file>
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>23</line>
</file>
</directive>
<directive id="HTML.MaxImgLength">
<file name="HTMLPurifier/HTMLModule/Image.php">
<line>14</line>
@ -513,4 +521,9 @@
<line>15</line>
</file>
</directive>
<directive id="URI.SafeIframeRegexp">
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>18</line>
</file>
</directive>
</usage>

View File

@ -150,6 +150,7 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Iframe.php';
require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php';
@ -205,6 +206,7 @@ require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIFilter/SafeIframe.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php';

View File

@ -144,6 +144,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
@ -199,6 +200,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';

View File

@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
}
public function make($string) {
$embeds = (bool) $string;
$embeds = ($string === 'embedded');
return new HTMLPurifier_AttrDef_URI($embeds);
}

View File

@ -0,0 +1,13 @@
HTML.SafeIframe
TYPE: bool
VERSION: 4.3.1
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit iframe tags in untrusted documents. This
directive must be accompanied by a whitelist of permitted iframes,
such as %URI.SafeIframeRegexp, otherwise it will fatally error.
This directive has no effect on strict doctypes, as iframes are not
valid.
</p>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,22 @@
URI.SafeIframeRegexp
TYPE: string/null
VERSION: 4.3.1
DEFAULT: NULL
--DESCRIPTION--
<p>
A PCRE regular expression that will be matched against an iframe URI. This is
a relatively inflexible scheme, but works well enough for the most common
use-case of iframes: embedded video. This directive only has an effect if
%HTML.SafeIframe is enabled. Here are some example values:
</p>
<ul>
<li><code>%^http://www.youtube.com/embed/%</code> - Allow YouTube videos</li>
<li><code>%^http://player.vimeo.com/video/%</code> - Allow Vimeo videos</li>
<li><code>%^http://(www.youtube.com/embed/|player.vimeo.com/video/)%</code> - Allow both</li>
</ul>
<p>
Note that this directive does not give you enough granularity to, say, disable
all <code>autoplay</code> videos. Pipe up on the HTML Purifier forums if this
is a capability you want.
</p>
--# vim: et sw=4 sts=4

View File

@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
'name' => 'CDATA',
'readonly' => 'Bool#readonly',
'size' => 'Number',
'src' => 'URI#embeds',
'src' => 'URI#embedded',
'tabindex' => 'Number',
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
'value' => 'CDATA',

View File

@ -0,0 +1,38 @@
<?php
/**
* XHTML 1.1 Iframe Module provides inline frames.
*
* @note This module is not considered safe unless an Iframe
* whitelisting mechanism is specified. Currently, the only
* such mechanism is %URL.SafeIframeRegexp
*/
class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
{
public $name = 'Iframe';
public $safe = false;
public function setup($config) {
if ($config->get('HTML.SafeIframe')) {
$this->safe = true;
}
$this->addElement(
'iframe', 'Inline', 'Flow', 'Common',
array(
'src' => 'URI#embedded',
'width' => 'Length',
'height' => 'Length',
'name' => 'ID',
'scrolling' => 'Enum#yes,no,auto',
'frameborder' => 'Enum#0,1',
'longdesc' => 'URI',
'marginheight' => 'Pixels',
'marginwidth' => 'Pixels',
)
);
}
}
// vim: et sw=4 sts=4

View File

@ -69,7 +69,7 @@ class HTMLPurifier_HTMLModuleManager
// Sorta legacy, but present in strict:
'Name',
);
$transitional = array('Legacy', 'Target');
$transitional = array('Legacy', 'Target', 'Iframe');
$xml = array('XMLCommonAttributes');
$non_xml = array('NonXMLCommonAttributes');
@ -112,7 +112,9 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register(
'XHTML 1.1', true,
array_merge($common, $xml, array('Ruby')),
// Iframe is a real XHTML 1.1 module, despite being
// "transitional"!
array_merge($common, $xml, array('Ruby', 'Iframe')),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
array(),
'-//W3C//DTD XHTML 1.1//EN',

View File

@ -27,6 +27,7 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
$this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
$this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
$this->registerFilter(new HTMLPurifier_URIFilter_Munge());
}
@ -52,11 +53,15 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
protected function setupFilters($config) {
foreach ($this->registeredFilters as $name => $filter) {
if ($filter->always_load) {
$this->addFilter($filter, $config);
} else {
$conf = $config->get('URI.' . $name);
if ($conf !== false && $conf !== null) {
$this->addFilter($filter, $config);
}
}
}
unset($this->registeredFilters);
}

View File

@ -4,7 +4,9 @@
* Chainable filters for custom URI processing.
*
* These filters can perform custom actions on a URI filter object,
* including transformation or blacklisting.
* including transformation or blacklisting. A filter named Foo
* must have a corresponding configuration directive %URI.Foo,
* unless always_load is specified to be true.
*
* The following contexts may be available while URIFilters are being
* processed:
@ -37,7 +39,15 @@ abstract class HTMLPurifier_URIFilter
public $post = false;
/**
* Performs initialization for the filter
* True if this filter should always be loaded (this permits
* a filter to be named Foo without the corresponding %URI.Foo
* directive existing.)
*/
public $always_load = false;
/**
* Performs initialization for the filter. If the filter returns
* false, this means that it shouldn't be considered active.
*/
public function prepare($config) {return true;}

View File

@ -0,0 +1,35 @@
<?php
/**
* Implements safety checks for safe iframes.
*
* @warning This filter is *critical* for ensuring that %HTML.SafeIframe
* works safely.
*/
class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
{
public $name = 'SafeIframe';
public $always_load = true;
protected $regexp = NULL;
// XXX: The not so good bit about how this is all setup now is we
// can't check HTML.SafeIframe in the 'prepare' step: we have to
// defer till the actual filtering.
public function prepare($config) {
$this->regexp = $config->get('URI.SafeIframeRegexp');
return true;
}
public function filter(&$uri, $config, $context) {
// check if filter not applicable
if (!$config->get('HTML.SafeIframe')) return true;
// check if the filter should actually trigger
if (!$context->get('EmbeddedURI', true)) return true;
$token = $context->get('CurrentToken', true);
if (!($token && $token->name == 'iframe')) return true;
// check if we actually have some whitelists enabled
if ($this->regexp === null) return false;
// actually check the whitelists
return preg_match($this->regexp, $uri->toString());
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,8 @@
--INI--
HTML.SafeIframe = true
URI.SafeIframeRegexp = "%^http://maps.google.com/%"
--HTML--
<iframe width="425" height="350" frameborder="0" scrolling="no" marginheight="0" marginwidth="0" src="http://maps.google.com/?ie=UTF8&amp;ll=37.0625,-95.677068&amp;spn=24.455808,37.353516&amp;z=4&amp;output=embed"></iframe>
--EXPECT--
<iframe width="425" height="350" frameborder="0" scrolling="no" marginheight="0" marginwidth="0" src="http://maps.google.com/?ie=UTF8&amp;ll=37.0625,-95.677068&amp;spn=24.455808,37.353516&amp;z=4&amp;output=embed"></iframe>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,8 @@
--INI--
HTML.SafeIframe = true
URI.SafeIframeRegexp = "%^http://www.youtube.com/embed/%"
--HTML--
<iframe title="YouTube video player" width="480" height="390" src="http://www.youtube.com/embed/RVtEQxH7PWA" frameborder="0" allowfullscreen></iframe>
--EXPECT--
<iframe title="YouTube video player" width="480" height="390" src="http://www.youtube.com/embed/RVtEQxH7PWA" frameborder="0"></iframe>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,14 @@
--INI--
HTML.SafeIframe = true
URI.SafeIframeRegexp = "%(^http://www.example.com/|^https?://dev.example.com/)%"
--HTML--
<iframe src="http://www.example.com/"></iframe>
<iframe src="http://malicious.host.com/?http://www.example.com/"></iframe>
<iframe src="http://dev.example.com/"></iframe>
<iframe src="https://dev.example.com/"></iframe>
--EXPECT--
<iframe src="http://www.example.com/"></iframe>
<iframe></iframe>
<iframe src="http://dev.example.com/"></iframe>
<iframe src="https://dev.example.com/"></iframe>
--# vim: et sw=4 sts=4