From fb38b0213561f7aa142c4c7b0c99a29b112caa58 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 9 Oct 2006 16:07:35 +0000 Subject: [PATCH] [1.2.0] Documentation updated - Moved docs from EntityParser to Encoder - Removed/updated docs in Generator git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@490 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 1 + library/HTMLPurifier/Encoder.php | 23 +++++++++++++++++++++++ library/HTMLPurifier/EntityParser.php | 25 ------------------------- library/HTMLPurifier/Generator.php | 3 +-- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/NEWS b/NEWS index 03cf871b..62a9429f 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 1.1.3, unknown projected release date (bugfix release, may be dropped if no major bugs are found before features) +- Documentation updated 1.1.2, released 2006-09-30 ! Add HTMLPurifier.auto.php stub file that configures include_path diff --git a/library/HTMLPurifier/Encoder.php b/library/HTMLPurifier/Encoder.php index ff4ea5af..8465c709 100644 --- a/library/HTMLPurifier/Encoder.php +++ b/library/HTMLPurifier/Encoder.php @@ -225,7 +225,30 @@ class HTMLPurifier_Encoder /** * Translates a Unicode codepoint into its corresponding UTF-8 character. + * @note Based on Feyd's function at + * , + * which is in public domain. + * @note While we're going to do code point parsing anyway, a good + * optimization would be to refuse to translate code points that + * are non-SGML characters. However, this could lead to duplication. + * @note This is very similar to the unichr function in + * maintenance/generate-entity-file.php (although this is superior, + * due to its sanity checks). */ + + // +----------+----------+----------+----------+ + // | 33222222 | 22221111 | 111111 | | + // | 10987654 | 32109876 | 54321098 | 76543210 | bit + // +----------+----------+----------+----------+ + // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F + // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF + // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF + // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF + // +----------+----------+----------+----------+ + // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) + // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes + // +----------+----------+----------+----------+ + function unichr($code) { if($code > 1114111 or $code < 0 or ($code >= 55296 and $code <= 57343) ) { diff --git a/library/HTMLPurifier/EntityParser.php b/library/HTMLPurifier/EntityParser.php index 478b6ba2..f23619e0 100644 --- a/library/HTMLPurifier/EntityParser.php +++ b/library/HTMLPurifier/EntityParser.php @@ -72,37 +72,12 @@ class HTMLPurifier_EntityParser * * @warning Though this is public in order to let the callback happen, * calling it directly is not recommended. - * @note Based on Feyd's function at - * , - * which is in public domain. - * @note While we're going to do code point parsing anyway, a good - * optimization would be to refuse to translate code points that - * are non-SGML characters. However, this could lead to duplication. - * @note This function is heavily intimate with the inner workings of - * UTF-8 and would also be well suited in the Encoder class (or at - * least deferring some processing to it). This is also very - * similar to the unichr function in - * maintenance/generate-entity-file.php (although this is superior, - * due to its sanity checks). * @param $matches PCRE matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @returns Replacement string. */ - // +----------+----------+----------+----------+ - // | 33222222 | 22221111 | 111111 | | - // | 10987654 | 32109876 | 54321098 | 76543210 | bit - // +----------+----------+----------+----------+ - // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F - // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF - // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF - // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF - // +----------+----------+----------+----------+ - // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) - // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes - // +----------+----------+----------+----------+ - function nonSpecialEntityCallback($matches) { // replaces all but big five $entity = $matches[0]; diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php index 4f2b9cf6..95d9c536 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/HTMLPurifier/Generator.php @@ -1,7 +1,5 @@