0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-09 07:21:54 +00:00

[1.2.0] Documentation updated

- Moved docs from EntityParser to Encoder
- Removed/updated docs in Generator

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@490 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-10-09 16:07:35 +00:00
parent 13790c6db2
commit fb38b02135
4 changed files with 25 additions and 27 deletions

1
NEWS
View File

@ -15,6 +15,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
1.1.3, unknown projected release date 1.1.3, unknown projected release date
(bugfix release, may be dropped if no major bugs are found before features) (bugfix release, may be dropped if no major bugs are found before features)
- Documentation updated
1.1.2, released 2006-09-30 1.1.2, released 2006-09-30
! Add HTMLPurifier.auto.php stub file that configures include_path ! Add HTMLPurifier.auto.php stub file that configures include_path

View File

@ -225,7 +225,30 @@ class HTMLPurifier_Encoder
/** /**
* Translates a Unicode codepoint into its corresponding UTF-8 character. * Translates a Unicode codepoint into its corresponding UTF-8 character.
* @note Based on Feyd's function at
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
* which is in public domain.
* @note While we're going to do code point parsing anyway, a good
* optimization would be to refuse to translate code points that
* are non-SGML characters. However, this could lead to duplication.
* @note This is very similar to the unichr function in
* maintenance/generate-entity-file.php (although this is superior,
* due to its sanity checks).
*/ */
// +----------+----------+----------+----------+
// | 33222222 | 22221111 | 111111 | |
// | 10987654 | 32109876 | 54321098 | 76543210 | bit
// +----------+----------+----------+----------+
// | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
// | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
// | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
// +----------+----------+----------+----------+
// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
// +----------+----------+----------+----------+
function unichr($code) { function unichr($code) {
if($code > 1114111 or $code < 0 or if($code > 1114111 or $code < 0 or
($code >= 55296 and $code <= 57343) ) { ($code >= 55296 and $code <= 57343) ) {

View File

@ -72,37 +72,12 @@ class HTMLPurifier_EntityParser
* *
* @warning Though this is public in order to let the callback happen, * @warning Though this is public in order to let the callback happen,
* calling it directly is not recommended. * calling it directly is not recommended.
* @note Based on Feyd's function at
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
* which is in public domain.
* @note While we're going to do code point parsing anyway, a good
* optimization would be to refuse to translate code points that
* are non-SGML characters. However, this could lead to duplication.
* @note This function is heavily intimate with the inner workings of
* UTF-8 and would also be well suited in the Encoder class (or at
* least deferring some processing to it). This is also very
* similar to the unichr function in
* maintenance/generate-entity-file.php (although this is superior,
* due to its sanity checks).
* @param $matches PCRE matches array, with 0 the entire match, and * @param $matches PCRE matches array, with 0 the entire match, and
* either index 1, 2 or 3 set with a hex value, dec value, * either index 1, 2 or 3 set with a hex value, dec value,
* or string (respectively). * or string (respectively).
* @returns Replacement string. * @returns Replacement string.
*/ */
// +----------+----------+----------+----------+
// | 33222222 | 22221111 | 111111 | |
// | 10987654 | 32109876 | 54321098 | 76543210 | bit
// +----------+----------+----------+----------+
// | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
// | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
// | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
// +----------+----------+----------+----------+
// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
// +----------+----------+----------+----------+
function nonSpecialEntityCallback($matches) { function nonSpecialEntityCallback($matches) {
// replaces all but big five // replaces all but big five
$entity = $matches[0]; $entity = $matches[0];

View File

@ -1,7 +1,5 @@
<?php <?php
// pretty-printing with indentation would be pretty cool
require_once 'HTMLPurifier/Lexer.php'; require_once 'HTMLPurifier/Lexer.php';
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
@ -52,6 +50,7 @@ class HTMLPurifier_Generator
/** /**
* Bool cache of %Core.XHTML * Bool cache of %Core.XHTML
* @private
*/ */
var $_xhtml = true; var $_xhtml = true;