--- old.php 2007-08-19 14:42:33.640625000 -0400 +++ new.php 2007-08-19 14:41:51.609375000 -0400 @@ -211,7 +211,10 @@ // If nothing is returned, emit a U+0026 AMPERSAND character token. // Otherwise, emit the character token that was returned. $char = (!$entity) ? '&' : $entity; - $this->emitToken($char); + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); // Finally, switch to the data state. $this->state = 'data'; @@ -708,7 +711,7 @@ } elseif($char === '&') { /* U+0026 AMPERSAND (&) Switch to the entity in attribute value state. */ - $this->entityInAttributeValueState('non'); + $this->entityInAttributeValueState(); } elseif($char === '>') { /* U+003E GREATER-THAN SIGN (>) @@ -738,7 +741,8 @@ ? '&' : $entity; - $this->emitToken($char); + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; } private function bogusCommentState() { @@ -1066,6 +1070,11 @@ $this->char++; if(in_array($id, $this->entities)) { + if ($e_name[$c-1] !== ';') { + if ($c < $len && $e_name[$c] == ';') { + $this->char++; // consume extra semicolon + } + } $entity = $id; break; }