htmlpurifier/tests/HTMLPurifier/AttrDef/LangTest.php

<?php

require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/Lang.php';

class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
{
    
    function test() {
        
        $this->def = new HTMLPurifier_AttrDef_Lang();
        
        // basic good uses
        $this->assertDef('en');
        $this->assertDef('en-us');
        
        $this->assertDef(' en ', 'en'); // trim
        $this->assertDef('EN', 'en'); // case insensitivity
        
        $this->assertDef('fr en', false); // multiple languages
        $this->assertDef('%', false); // bad character
        
        // test overlong language according to syntax
        $this->assertDef('thisistoolongsoitgetscut', false);
        
        // primary subtag rules
            // I'm somewhat hesitant to allow x and i as primary language codes,
            // because they usually are never used in real life. However,
            // theoretically speaking, having them alone is permissble, so
            // I'll be lenient. No XML parser is going to complain anyway.
        $this->assertDef('x');
        $this->assertDef('i');
            // real world use-cases
        $this->assertDef('x-klingon');
        $this->assertDef('i-mingo');
            // because the RFC only defines two and three letter primary codes,
            // anything with a length of four or greater is invalid, despite
            // the syntax stipulation of 1 to 8 characters. Because the RFC
            // specifically states that this reservation is in order to allow
            // for future versions to expand, the adoption of a new RFC will
            // require these test cases to be rewritten, even if backwards-
            // compatibility is largely retained (i.e. this is not forwards
            // compatible)
        $this->assertDef('four', false);
            // for similar reasons, disallow any other one character language
        $this->assertDef('f', false);
        
        // second subtag rules
            // one letter subtags prohibited until revision. This is, however,
            // less volatile than the restrictions on the primary subtags.
            // Also note that this test-case tests fix-behavior: chop
            // off subtags until you get a valid language code.
        $this->assertDef('en-a', 'en');
            // 2-8 chars are permitted, but have special meaning that cannot
            // be checked without maintaining country code lookup tables (for
            // two characters) or special registration tables (for all above).
        $this->assertDef('en-uk', true);
        
        // further subtag rules: only syntactic constraints
        $this->assertDef('en-us-edison');
        $this->assertDef('en-us-toolonghaha', 'en-us');
        $this->assertDef('en-us-a-silly-long-one');
        
        // rfc 3066 stipulates that if a three letter and a two letter code
        // are available, the two letter one MUST be used. Without a language
        // code lookup table, we cannot implement this functionality.
        
        // although the HTML protocol, technically speaking, allows you to
        // omit language tags, this implicitly means that the parent element's
        // language is the one applicable, which, in some cases, is incorrect.
        // Thus, we allow und, only slightly defying the RFC's SHOULD NOT
        // designation.
        $this->assertDef('und');
        
        // because attributes only allow one language, mul is allowed, complying
        // with the RFC's SHOULD NOT designation.
        $this->assertDef('mul');
        
    }
    
}

?>
Implement lang and xml:lang. Fixed a bunch of bugs too. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@162 48356398-32a2-884e-a903-53898d9a118a 2006-08-05 01:50:13 +00:00			`<?php`

			`require_once 'HTMLPurifier/AttrDefHarness.php';`
			`require_once 'HTMLPurifier/AttrDef/Lang.php';`

			`class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness`
			`{`

			`function test() {`

			`$this->def = new HTMLPurifier_AttrDef_Lang();`

			`// basic good uses`
			`$this->assertDef('en');`
			`$this->assertDef('en-us');`

			`$this->assertDef(' en ', 'en'); // trim`
			`$this->assertDef('EN', 'en'); // case insensitivity`

			`$this->assertDef('fr en', false); // multiple languages`
			`$this->assertDef('%', false); // bad character`

			`// test overlong language according to syntax`
			`$this->assertDef('thisistoolongsoitgetscut', false);`

			`// primary subtag rules`
			`// I'm somewhat hesitant to allow x and i as primary language codes,`
			`// because they usually are never used in real life. However,`
			`// theoretically speaking, having them alone is permissble, so`
			`// I'll be lenient. No XML parser is going to complain anyway.`
			`$this->assertDef('x');`
			`$this->assertDef('i');`
			`// real world use-cases`
			`$this->assertDef('x-klingon');`
			`$this->assertDef('i-mingo');`
			`// because the RFC only defines two and three letter primary codes,`
			`// anything with a length of four or greater is invalid, despite`
			`// the syntax stipulation of 1 to 8 characters. Because the RFC`
			`// specifically states that this reservation is in order to allow`
			`// for future versions to expand, the adoption of a new RFC will`
			`// require these test cases to be rewritten, even if backwards-`
			`// compatibility is largely retained (i.e. this is not forwards`
			`// compatible)`
			`$this->assertDef('four', false);`
			`// for similar reasons, disallow any other one character language`
			`$this->assertDef('f', false);`

			`// second subtag rules`
			`// one letter subtags prohibited until revision. This is, however,`
			`// less volatile than the restrictions on the primary subtags.`
			`// Also note that this test-case tests fix-behavior: chop`
			`// off subtags until you get a valid language code.`
			`$this->assertDef('en-a', 'en');`
			`// 2-8 chars are permitted, but have special meaning that cannot`
			`// be checked without maintaining country code lookup tables (for`
			`// two characters) or special registration tables (for all above).`
			`$this->assertDef('en-uk', true);`

			`// further subtag rules: only syntactic constraints`
			`$this->assertDef('en-us-edison');`
			`$this->assertDef('en-us-toolonghaha', 'en-us');`
			`$this->assertDef('en-us-a-silly-long-one');`

			`// rfc 3066 stipulates that if a three letter and a two letter code`
			`// are available, the two letter one MUST be used. Without a language`
			`// code lookup table, we cannot implement this functionality.`

			`// although the HTML protocol, technically speaking, allows you to`
			`// omit language tags, this implicitly means that the parent element's`
			`// language is the one applicable, which, in some cases, is incorrect.`
			`// Thus, we allow und, only slightly defying the RFC's SHOULD NOT`
			`// designation.`
			`$this->assertDef('und');`

			`// because attributes only allow one language, mul is allowed, complying`
			`// with the RFC's SHOULD NOT designation.`
			`$this->assertDef('mul');`

			`}`

			`}`

			`?>`