0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-23 08:51:53 +00:00

[1.5.0] Add rudimentary I18N and L10N support based off MediaWiki

- Also: allow 'x' subtag in language codes

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@701 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-01-29 20:11:00 +00:00
parent be264a4b20
commit 98fd6b7d82
11 changed files with 365 additions and 1 deletions

4
NEWS
View File

@ -9,6 +9,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change . Internal change
========================== ==========================
1.5.0, unknown release date
! Added a rudimentary I18N and L10N system modeled off MediaWiki
- Allow 'x' subtag in language codes
1.4.2, unknown release date 1.4.2, unknown release date
! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier ! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier

View File

@ -46,7 +46,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
// process second subtag : $subtags[1] // process second subtag : $subtags[1]
$length = strlen($subtags[1]); $length = strlen($subtags[1]);
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) { if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
return $new_string; return $new_string;
} }
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]); if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);

View File

@ -0,0 +1,56 @@
<?php
require_once 'HTMLPurifier/LanguageFactory.php';
class HTMLPurifier_Language
{
/**
* ISO 639 language code of language. Prefers shortest possible version
*/
var $code = 'en';
/**
* Fallback language code
*/
var $fallback = false;
/**
* Array of localizable messages
*/
var $messages = array();
/**
* Has the language object been loaded yet?
* @private
*/
var $_loaded = false;
/**
* Loads language object with necessary info from factory cache
* @note This is a lazy loader
*/
function load() {
if ($this->_loaded) return;
$factory = HTMLPurifier_LanguageFactory::instance();
$factory->loadLanguage($this->code);
foreach ($factory->keys as $key) {
$this->$key = $factory->cache[$this->code][$key];
}
$this->_loaded = true;
}
/**
* Retrieves a localised message. Does not perform any operations.
* @param $key string identifier of message
* @return string localised message
*/
function getMessage($key) {
if (!$this->_loaded) $this->load();
if (!isset($this->messages[$key])) return '';
return $this->messages[$key];
}
}
?>

View File

@ -0,0 +1,12 @@
<?php
// private class for unit testing
class HTMLPurifier_Language_en_x_test extends HTMLPurifier_Language
{
}
?>

View File

@ -0,0 +1,11 @@
<?php
// private language message file for unit testing purposes
$fallback = 'en';
$messages = array(
'htmlpurifier' => 'HTML Purifier X'
);
?>

View File

@ -0,0 +1,12 @@
<?php
$fallback = false;
$messages = array(
'htmlpurifier' => 'HTML Purifier',
'pizza' => 'Pizza', // for unit testing purposes
);
?>

View File

@ -0,0 +1,196 @@
<?php
require_once 'HTMLPurifier/Language.php';
require_once 'HTMLPurifier/AttrDef/Lang.php';
/**
* Class responsible for generating HTMLPurifier_Language objects, managing
* caching and fallbacks.
* @note Thanks to MediaWiki for the general logic, although this version
* has been entirely rewritten
*/
class HTMLPurifier_LanguageFactory
{
/**
* Cache of language code information used to load HTMLPurifier_Language objects
* Structure is: $factory->cache[$language_code][$key] = $value
* @value array map
*/
var $cache;
/**
* Valid keys in the HTMLPurifier_Language object. Designates which
* variables to slurp out of a message file.
* @value array list
*/
var $keys = array('fallback', 'messages');
/**
* Instance of HTMLPurifier_AttrDef_Lang to validate language codes
* @value object HTMLPurifier_AttrDef_Lang
*/
var $validator;
/**
* Cached copy of dirname(__FILE__), directory of current file without
* trailing slash
* @value string filename
*/
var $dir;
/**
* Keys whose contents are a hash map and can be merged
* @value array lookup
*/
var $mergeable_keys_map = array('messages' => true);
/**
* Keys whose contents are a list and can be merged
* @value array lookup
*/
var $mergeable_keys_list = array();
/**
* Retrieve sole instance of the factory.
* @static
* @param $prototype Optional prototype to overload sole instance with,
* or bool true to reset to default factory.
*/
function &instance($prototype = null) {
static $instance = null;
if ($prototype !== null) {
$instance = $prototype;
} elseif ($instance === null || $prototype == true) {
$instance = new HTMLPurifier_LanguageFactory();
$instance->setup();
}
return $instance;
}
/**
* Sets up the singleton, much like a constructor
* @note Prevents people from getting this outside of the singleton
*/
function setup() {
$this->validator = new HTMLPurifier_AttrDef_Lang();
$this->dir = dirname(__FILE__);
}
/**
* Creates a language object, handles class fallbacks
* @param $code string language code
*/
function create($code) {
$config = $context = false; // hope it doesn't use these!
$code = $this->validator->validate($code, $config, $context);
if ($code === false) $code = 'en'; // malformed code becomes English
$pcode = str_replace('-', '_', $code); // make valid PHP classname
static $depth = 0; // recursion protection
if ($code == 'en') {
$class = 'HTMLPurifier_Language';
$file = $this->dir . '/Language.php';
} else {
$class = 'HTMLPurifier_Language_' . $pcode;
$file = $this->dir . '/Language/classes/' . $code . '.php';
// PHP5/APC deps bug workaround can go here
// you can bypass the conditional include by loading the
// file yourself
if (file_exists($file) && !class_exists($class)) {
include_once $file;
}
}
if (!class_exists($class)) {
// go fallback
$fallback = HTMLPurifier_Language::getFallbackFor($code);
$depth++;
$lang = Language::factory( $fallback );
$depth--;
} else {
$lang = new $class;
}
$lang->code = $code;
return $lang;
}
/**
* Returns the fallback language for language
* @note Loads the original language into cache
* @param $code string language code
*/
function getFallbackFor($code) {
$this->loadLanguage($code);
return $this->cache[$code]['fallback'];
}
/**
* Loads language into the cache, handles message file and fallbacks
* @param $code string language code
*/
function loadLanguage($code) {
static $languages_seen = array(); // recursion guard
// abort if we've already loaded it
if (isset($this->cache[$code])) return;
// generate filename
$filename = $this->dir . '/Language/messages/' . $code . '.php';
// default fallback : may be overwritten by the ensuing include
$fallback = ($code != 'en') ? 'en' : false;
// load primary localisation
if (!file_exists($filename)) {
// skip the include: will rely solely on fallback
$filename = $this->dir . '/Language/messages/en.php';
$cache = array();
} else {
include $filename;
$cache = compact($this->keys);
}
// load fallback localisation
if (!empty($fallback)) {
// infinite recursion guard
if (isset($languages_seen[$code])) {
trigger_error('Circular fallback reference in language ' .
$code, E_USER_ERROR);
$fallback = 'en';
}
$language_seen[$code] = true;
// load the fallback recursively
$this->loadLanguage($fallback);
$fallback_cache = $this->cache[$fallback];
// merge fallback with current language
foreach ( $this->keys as $key ) {
if (isset($cache[$key]) && isset($fallback_cache[$key])) {
if (isset($this->mergeable_keys_map[$key])) {
$cache[$key] = $cache[$key] + $fallback_cache[$key];
} elseif (isset($this->mergeable_keys_list[$key])) {
$cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
}
} else {
$cache[$key] = $fallback_cache[$key];
}
}
}
// save to cache for later retrieval
$this->cache[$code] = $cache;
return;
}
}
?>

View File

@ -54,6 +54,8 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
// Also note that this test-case tests fix-behavior: chop // Also note that this test-case tests fix-behavior: chop
// off subtags until you get a valid language code. // off subtags until you get a valid language code.
$this->assertDef('en-a', 'en'); $this->assertDef('en-a', 'en');
// however, x is a reserved single-letter subtag that is allowed
$this->assertDef('en-x', 'en-x');
// 2-8 chars are permitted, but have special meaning that cannot // 2-8 chars are permitted, but have special meaning that cannot
// be checked without maintaining country code lookup tables (for // be checked without maintaining country code lookup tables (for
// two characters) or special registration tables (for all above). // two characters) or special registration tables (for all above).

View File

@ -0,0 +1,47 @@
<?php
require_once 'HTMLPurifier/LanguageFactory.php';
class HTMLPurifier_LanguageFactoryTest extends UnitTestCase
{
function test() {
$factory = HTMLPurifier_LanguageFactory::instance();
$language = $factory->create('en');
$this->assertIsA($language, 'HTMLPurifier_Language');
$this->assertEqual($language->code, 'en');
// lazy loading test
$this->assertEqual(count($language->messages), 0);
$language->load();
$this->assertNotEqual(count($language->messages), 0);
// actual tests for content can be found in LanguageTest
}
function testFallback() {
$factory = HTMLPurifier_LanguageFactory::instance();
$language = $factory->create('en-x-test');
$this->assertIsA($language, 'HTMLPurifier_Language_en_x_test');
$this->assertEqual($language->code, 'en-x-test');
$language->load();
// test overloaded message
$this->assertEqual($language->getMessage('htmlpurifier'), 'HTML Purifier X');
// test inherited message
$this->assertEqual($language->getMessage('pizza'), 'Pizza');
}
}
?>

View File

@ -0,0 +1,22 @@
<?php
require_once 'HTMLPurifier/Language.php';
class HTMLPurifier_LanguageTest extends UnitTestCase
{
var $lang;
function setup() {
$factory = HTMLPurifier_LanguageFactory::instance();
$this->lang = $factory->create('en');
}
function test_getMessage() {
$this->assertIdentical($this->lang->getMessage('htmlpurifier'), 'HTML Purifier');
$this->assertIdentical($this->lang->getMessage('totally-non-existent-key'), '');
}
}
?>

View File

@ -64,6 +64,8 @@ $test_files[] = 'EntityParserTest.php';
$test_files[] = 'Test.php'; $test_files[] = 'Test.php';
$test_files[] = 'ContextTest.php'; $test_files[] = 'ContextTest.php';
$test_files[] = 'PercentEncoderTest.php'; $test_files[] = 'PercentEncoderTest.php';
$test_files[] = 'LanguageFactoryTest.php';
$test_files[] = 'LanguageTest.php';
if (version_compare(PHP_VERSION, '5', '>=')) { if (version_compare(PHP_VERSION, '5', '>=')) {
$test_files[] = 'TokenFactoryTest.php'; $test_files[] = 'TokenFactoryTest.php';