0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-08 14:58:42 +00:00
htmlpurifier/maintenance/PH5P.patch
Edward Z. Yang 6bb8c1fcac Handle CRLF discrepancies
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-06-24 21:10:51 -04:00

103 lines
3.8 KiB
Diff

--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-05 00:01:51.643585000 -0500
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2008-04-05 00:26:39.343160000 -0400
@@ -65,7 +65,7 @@
public function __construct($data) {
$data = str_replace("\r\n", "\n", $data);
- $date = str_replace("\r", null, $data);
+ $data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;
@@ -211,7 +211,10 @@
// If nothing is returned, emit a U+0026 AMPERSAND character token.
// Otherwise, emit the character token that was returned.
$char = (!$entity) ? '&' : $entity;
- $this->emitToken($char);
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => $char
+ ));
// Finally, switch to the data state.
$this->state = 'data';
@@ -708,7 +711,7 @@
} elseif($char === '&') {
/* U+0026 AMPERSAND (&)
Switch to the entity in attribute value state. */
- $this->entityInAttributeValueState('non');
+ $this->entityInAttributeValueState();
} elseif($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
@@ -738,7 +741,8 @@
? '&'
: $entity;
- $this->emitToken($char);
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char;
}
private function bogusCommentState() {
@@ -1066,6 +1070,11 @@
$this->char++;
if(in_array($id, $this->entities)) {
+ if ($e_name[$c-1] !== ';') {
+ if ($c < $len && $e_name[$c] == ';') {
+ $this->char++; // consume extra semicolon
+ }
+ }
$entity = $id;
break;
}
@@ -2084,7 +2093,7 @@
/* Reconstruct the active formatting elements, if any. */
$this->reconstructActiveFormattingElements();
- $this->insertElement($token);
+ $this->insertElement($token, true, true);
break;
}
break;
@@ -3465,7 +3474,18 @@
}
}
- private function insertElement($token, $append = true) {
+ private function insertElement($token, $append = true, $check = false) {
+ // Proprietary workaround for libxml2's limitations with tag names
+ if ($check) {
+ // Slightly modified HTML5 tag-name modification,
+ // removing anything that's not an ASCII letter, digit, or hyphen
+ $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
+ // Remove leading hyphens and numbers
+ $token['name'] = ltrim($token['name'], '-0..9');
+ // In theory, this should ever be needed, but just in case
+ if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
+ }
+
$el = $this->dom->createElement($token['name']);
foreach($token['attr'] as $attr) {
@@ -3659,7 +3679,7 @@
}
}
- private function generateImpliedEndTags(array $exclude = array()) {
+ private function generateImpliedEndTags($exclude = array()) {
/* When the steps below require the UA to generate implied end tags,
then, if the current node is a dd element, a dt element, an li element,
a p element, a td element, a th element, or a tr element, the UA must
@@ -3673,7 +3693,8 @@
}
}
- private function getElementCategory($name) {
+ private function getElementCategory($node) {
+ $name = $node->tagName;
if(in_array($name, $this->special))
return self::SPECIAL;