diff --git a/NEWS b/NEWS index 5aad5d0a..37d42853 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Internal change ========================== +1.5.0, unknown release date +! Added a rudimentary I18N and L10N system modeled off MediaWiki +- Allow 'x' subtag in language codes + 1.4.2, unknown release date ! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier diff --git a/library/HTMLPurifier/AttrDef/Lang.php b/library/HTMLPurifier/AttrDef/Lang.php index 67183747..72d67f64 100644 --- a/library/HTMLPurifier/AttrDef/Lang.php +++ b/library/HTMLPurifier/AttrDef/Lang.php @@ -46,7 +46,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef // process second subtag : $subtags[1] $length = strlen($subtags[1]); - if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) { + if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) { return $new_string; } if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]); diff --git a/library/HTMLPurifier/Language.php b/library/HTMLPurifier/Language.php new file mode 100644 index 00000000..ca6fe031 --- /dev/null +++ b/library/HTMLPurifier/Language.php @@ -0,0 +1,56 @@ +_loaded) return; + $factory = HTMLPurifier_LanguageFactory::instance(); + $factory->loadLanguage($this->code); + foreach ($factory->keys as $key) { + $this->$key = $factory->cache[$this->code][$key]; + } + $this->_loaded = true; + } + + /** + * Retrieves a localised message. Does not perform any operations. + * @param $key string identifier of message + * @return string localised message + */ + function getMessage($key) { + if (!$this->_loaded) $this->load(); + if (!isset($this->messages[$key])) return ''; + return $this->messages[$key]; + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Language/classes/en-x-test.php b/library/HTMLPurifier/Language/classes/en-x-test.php new file mode 100644 index 00000000..303ba4ba --- /dev/null +++ b/library/HTMLPurifier/Language/classes/en-x-test.php @@ -0,0 +1,12 @@ + \ No newline at end of file diff --git a/library/HTMLPurifier/Language/messages/en-x-test.php b/library/HTMLPurifier/Language/messages/en-x-test.php new file mode 100644 index 00000000..115662bd --- /dev/null +++ b/library/HTMLPurifier/Language/messages/en-x-test.php @@ -0,0 +1,11 @@ + 'HTML Purifier X' +); + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php new file mode 100644 index 00000000..7650b818 --- /dev/null +++ b/library/HTMLPurifier/Language/messages/en.php @@ -0,0 +1,12 @@ + 'HTML Purifier', +'pizza' => 'Pizza', // for unit testing purposes + +); + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/LanguageFactory.php b/library/HTMLPurifier/LanguageFactory.php new file mode 100644 index 00000000..7097ced7 --- /dev/null +++ b/library/HTMLPurifier/LanguageFactory.php @@ -0,0 +1,196 @@ +cache[$language_code][$key] = $value + * @value array map + */ + var $cache; + + /** + * Valid keys in the HTMLPurifier_Language object. Designates which + * variables to slurp out of a message file. + * @value array list + */ + var $keys = array('fallback', 'messages'); + + /** + * Instance of HTMLPurifier_AttrDef_Lang to validate language codes + * @value object HTMLPurifier_AttrDef_Lang + */ + var $validator; + + /** + * Cached copy of dirname(__FILE__), directory of current file without + * trailing slash + * @value string filename + */ + var $dir; + + /** + * Keys whose contents are a hash map and can be merged + * @value array lookup + */ + var $mergeable_keys_map = array('messages' => true); + + /** + * Keys whose contents are a list and can be merged + * @value array lookup + */ + var $mergeable_keys_list = array(); + + /** + * Retrieve sole instance of the factory. + * @static + * @param $prototype Optional prototype to overload sole instance with, + * or bool true to reset to default factory. + */ + function &instance($prototype = null) { + static $instance = null; + if ($prototype !== null) { + $instance = $prototype; + } elseif ($instance === null || $prototype == true) { + $instance = new HTMLPurifier_LanguageFactory(); + $instance->setup(); + } + return $instance; + } + + /** + * Sets up the singleton, much like a constructor + * @note Prevents people from getting this outside of the singleton + */ + function setup() { + $this->validator = new HTMLPurifier_AttrDef_Lang(); + $this->dir = dirname(__FILE__); + } + + /** + * Creates a language object, handles class fallbacks + * @param $code string language code + */ + function create($code) { + + $config = $context = false; // hope it doesn't use these! + $code = $this->validator->validate($code, $config, $context); + if ($code === false) $code = 'en'; // malformed code becomes English + + $pcode = str_replace('-', '_', $code); // make valid PHP classname + static $depth = 0; // recursion protection + + if ($code == 'en') { + $class = 'HTMLPurifier_Language'; + $file = $this->dir . '/Language.php'; + } else { + $class = 'HTMLPurifier_Language_' . $pcode; + $file = $this->dir . '/Language/classes/' . $code . '.php'; + // PHP5/APC deps bug workaround can go here + // you can bypass the conditional include by loading the + // file yourself + if (file_exists($file) && !class_exists($class)) { + include_once $file; + } + } + + if (!class_exists($class)) { + // go fallback + $fallback = HTMLPurifier_Language::getFallbackFor($code); + $depth++; + $lang = Language::factory( $fallback ); + $depth--; + } else { + $lang = new $class; + } + $lang->code = $code; + + return $lang; + + } + + /** + * Returns the fallback language for language + * @note Loads the original language into cache + * @param $code string language code + */ + function getFallbackFor($code) { + $this->loadLanguage($code); + return $this->cache[$code]['fallback']; + } + + /** + * Loads language into the cache, handles message file and fallbacks + * @param $code string language code + */ + function loadLanguage($code) { + static $languages_seen = array(); // recursion guard + + // abort if we've already loaded it + if (isset($this->cache[$code])) return; + + // generate filename + $filename = $this->dir . '/Language/messages/' . $code . '.php'; + + // default fallback : may be overwritten by the ensuing include + $fallback = ($code != 'en') ? 'en' : false; + + // load primary localisation + if (!file_exists($filename)) { + // skip the include: will rely solely on fallback + $filename = $this->dir . '/Language/messages/en.php'; + $cache = array(); + } else { + include $filename; + $cache = compact($this->keys); + } + + // load fallback localisation + if (!empty($fallback)) { + + // infinite recursion guard + if (isset($languages_seen[$code])) { + trigger_error('Circular fallback reference in language ' . + $code, E_USER_ERROR); + $fallback = 'en'; + } + $language_seen[$code] = true; + + // load the fallback recursively + $this->loadLanguage($fallback); + $fallback_cache = $this->cache[$fallback]; + + // merge fallback with current language + foreach ( $this->keys as $key ) { + if (isset($cache[$key]) && isset($fallback_cache[$key])) { + if (isset($this->mergeable_keys_map[$key])) { + $cache[$key] = $cache[$key] + $fallback_cache[$key]; + } elseif (isset($this->mergeable_keys_list[$key])) { + $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] ); + } + } else { + $cache[$key] = $fallback_cache[$key]; + } + } + + } + + // save to cache for later retrieval + $this->cache[$code] = $cache; + + return; + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/AttrDef/LangTest.php b/tests/HTMLPurifier/AttrDef/LangTest.php index 7a0e4308..a5472e91 100644 --- a/tests/HTMLPurifier/AttrDef/LangTest.php +++ b/tests/HTMLPurifier/AttrDef/LangTest.php @@ -54,6 +54,8 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness // Also note that this test-case tests fix-behavior: chop // off subtags until you get a valid language code. $this->assertDef('en-a', 'en'); + // however, x is a reserved single-letter subtag that is allowed + $this->assertDef('en-x', 'en-x'); // 2-8 chars are permitted, but have special meaning that cannot // be checked without maintaining country code lookup tables (for // two characters) or special registration tables (for all above). diff --git a/tests/HTMLPurifier/LanguageFactoryTest.php b/tests/HTMLPurifier/LanguageFactoryTest.php new file mode 100644 index 00000000..050d30d8 --- /dev/null +++ b/tests/HTMLPurifier/LanguageFactoryTest.php @@ -0,0 +1,47 @@ +create('en'); + + $this->assertIsA($language, 'HTMLPurifier_Language'); + $this->assertEqual($language->code, 'en'); + + // lazy loading test + $this->assertEqual(count($language->messages), 0); + $language->load(); + $this->assertNotEqual(count($language->messages), 0); + + // actual tests for content can be found in LanguageTest + + } + + function testFallback() { + + $factory = HTMLPurifier_LanguageFactory::instance(); + + $language = $factory->create('en-x-test'); + + $this->assertIsA($language, 'HTMLPurifier_Language_en_x_test'); + $this->assertEqual($language->code, 'en-x-test'); + + $language->load(); + + // test overloaded message + $this->assertEqual($language->getMessage('htmlpurifier'), 'HTML Purifier X'); + + // test inherited message + $this->assertEqual($language->getMessage('pizza'), 'Pizza'); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/LanguageTest.php b/tests/HTMLPurifier/LanguageTest.php new file mode 100644 index 00000000..dd88c90f --- /dev/null +++ b/tests/HTMLPurifier/LanguageTest.php @@ -0,0 +1,22 @@ +lang = $factory->create('en'); + } + + function test_getMessage() { + $this->assertIdentical($this->lang->getMessage('htmlpurifier'), 'HTML Purifier'); + $this->assertIdentical($this->lang->getMessage('totally-non-existent-key'), ''); + } + +} + +?> \ No newline at end of file diff --git a/tests/test_files.php b/tests/test_files.php index ab83ec47..541b1c44 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -64,6 +64,8 @@ $test_files[] = 'EntityParserTest.php'; $test_files[] = 'Test.php'; $test_files[] = 'ContextTest.php'; $test_files[] = 'PercentEncoderTest.php'; +$test_files[] = 'LanguageFactoryTest.php'; +$test_files[] = 'LanguageTest.php'; if (version_compare(PHP_VERSION, '5', '>=')) { $test_files[] = 'TokenFactoryTest.php';