mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-10 07:38:41 +00:00
83 lines
3.6 KiB
PHP
83 lines
3.6 KiB
PHP
|
<?php
|
||
|
|
||
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||
|
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||
|
|
||
|
class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
|
||
|
{
|
||
|
|
||
|
function test() {
|
||
|
|
||
|
$this->def = new HTMLPurifier_AttrDef_Lang();
|
||
|
|
||
|
// basic good uses
|
||
|
$this->assertDef('en');
|
||
|
$this->assertDef('en-us');
|
||
|
|
||
|
$this->assertDef(' en ', 'en'); // trim
|
||
|
$this->assertDef('EN', 'en'); // case insensitivity
|
||
|
|
||
|
$this->assertDef('fr en', false); // multiple languages
|
||
|
$this->assertDef('%', false); // bad character
|
||
|
|
||
|
// test overlong language according to syntax
|
||
|
$this->assertDef('thisistoolongsoitgetscut', false);
|
||
|
|
||
|
// primary subtag rules
|
||
|
// I'm somewhat hesitant to allow x and i as primary language codes,
|
||
|
// because they usually are never used in real life. However,
|
||
|
// theoretically speaking, having them alone is permissble, so
|
||
|
// I'll be lenient. No XML parser is going to complain anyway.
|
||
|
$this->assertDef('x');
|
||
|
$this->assertDef('i');
|
||
|
// real world use-cases
|
||
|
$this->assertDef('x-klingon');
|
||
|
$this->assertDef('i-mingo');
|
||
|
// because the RFC only defines two and three letter primary codes,
|
||
|
// anything with a length of four or greater is invalid, despite
|
||
|
// the syntax stipulation of 1 to 8 characters. Because the RFC
|
||
|
// specifically states that this reservation is in order to allow
|
||
|
// for future versions to expand, the adoption of a new RFC will
|
||
|
// require these test cases to be rewritten, even if backwards-
|
||
|
// compatibility is largely retained (i.e. this is not forwards
|
||
|
// compatible)
|
||
|
$this->assertDef('four', false);
|
||
|
// for similar reasons, disallow any other one character language
|
||
|
$this->assertDef('f', false);
|
||
|
|
||
|
// second subtag rules
|
||
|
// one letter subtags prohibited until revision. This is, however,
|
||
|
// less volatile than the restrictions on the primary subtags.
|
||
|
// Also note that this test-case tests fix-behavior: chop
|
||
|
// off subtags until you get a valid language code.
|
||
|
$this->assertDef('en-a', 'en');
|
||
|
// 2-8 chars are permitted, but have special meaning that cannot
|
||
|
// be checked without maintaining country code lookup tables (for
|
||
|
// two characters) or special registration tables (for all above).
|
||
|
$this->assertDef('en-uk', true);
|
||
|
|
||
|
// further subtag rules: only syntactic constraints
|
||
|
$this->assertDef('en-us-edison');
|
||
|
$this->assertDef('en-us-toolonghaha', 'en-us');
|
||
|
$this->assertDef('en-us-a-silly-long-one');
|
||
|
|
||
|
// rfc 3066 stipulates that if a three letter and a two letter code
|
||
|
// are available, the two letter one MUST be used. Without a language
|
||
|
// code lookup table, we cannot implement this functionality.
|
||
|
|
||
|
// although the HTML protocol, technically speaking, allows you to
|
||
|
// omit language tags, this implicitly means that the parent element's
|
||
|
// language is the one applicable, which, in some cases, is incorrect.
|
||
|
// Thus, we allow und, only slightly defying the RFC's SHOULD NOT
|
||
|
// designation.
|
||
|
$this->assertDef('und');
|
||
|
|
||
|
// because attributes only allow one language, mul is allowed, complying
|
||
|
// with the RFC's SHOULD NOT designation.
|
||
|
$this->assertDef('mul');
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
?>
|