armor['MakeWellFormed_TagClosedError'] = true; return $par; } public function handleText(&$token) { $text = $token->data; if (empty($this->currentNesting)) { if (!$this->allowsElement('p')) return; // case 1: we're in root node (and it allows paragraphs) $token = array($this->_pStart()); $this->_splitText($text, $token); } elseif ($this->currentNesting[count($this->currentNesting)-1]->name == 'p') { // case 2: we're in a paragraph $token = array(); $this->_splitText($text, $token); } elseif ($this->allowsElement('p')) { // case 3: we're in an element that allows paragraphs if (strpos($text, "\n\n") !== false) { // case 3.1: this text node has a double-newline $token = array($this->_pStart()); $this->_splitText($text, $token); } else { $ok = false; // test if up-coming tokens are either block or have // a double newline in them $nesting = 0; for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start){ if (!$this->_isInline($this->inputTokens[$i])) { // we haven't found a double-newline, and // we've hit a block element, so don't paragraph $ok = false; break; } $nesting++; } if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) { if ($nesting <= 0) break; $nesting--; } if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) { // found it! if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { $ok = true; break; } } } if ($ok) { // case 3.2: this text node is next to another node // that will start a paragraph $token = array($this->_pStart(), $token); } } } } public function handleElement(&$token) { // check if we're inside a tag already if (!empty($this->currentNesting)) { if ($this->allowsElement('p')) { // special case: we're in an element that allows paragraphs // this token is already paragraph, abort if ($token->name == 'p') return; // this token is a block level, abort if (!$this->_isInline($token)) return; // check if this token is adjacent to the parent token $prev = $this->inputTokens[$this->inputIndex - 1]; if (!$prev instanceof HTMLPurifier_Token_Start) { // not adjacent, we can abort early // add lead paragraph tag if our token is inline // and the previous tag was an end paragraph if ( $prev->name == 'p' && $prev instanceof HTMLPurifier_Token_End && $this->_isInline($token) ) { $token = array($this->_pStart(), $token); } return; } // this token is the first child of the element that allows // paragraph. We have to peek ahead and see whether or not // there is anything inside that suggests that a paragraph // will be needed $ok = false; // maintain a mini-nesting counter, this lets us bail out // early if possible $j = 1; // current nesting, one is due to parent (we recalculate current token) for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) { if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start) $j++; if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) $j--; if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) { if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { $ok = true; break; } } if ($j <= 0) break; } if ($ok) { $token = array($this->_pStart(), $token); } } return; } // check if the start tag counts as a "block" element if (!$this->_isInline($token)) return; // append a paragraph tag before the token $token = array($this->_pStart(), $token); } /** * Splits up a text in paragraph tokens and appends them * to the result stream that will replace the original * @param $data String text data that will be processed * into paragraphs * @param $result Reference to array of tokens that the * tags will be appended onto * @param $config Instance of HTMLPurifier_Config * @param $context Instance of HTMLPurifier_Context */ private function _splitText($data, &$result) { $raw_paragraphs = explode("\n\n", $data); // remove empty paragraphs $paragraphs = array(); $needs_start = false; $needs_end = false; $c = count($raw_paragraphs); if ($c == 1) { // there were no double-newlines, abort quickly $result[] = new HTMLPurifier_Token_Text($data); return; } for ($i = 0; $i < $c; $i++) { $par = $raw_paragraphs[$i]; if (trim($par) !== '') { $paragraphs[] = $par; continue; } if ($i == 0 && empty($result)) { // The empty result indicates that the AutoParagraph // injector did not add any start paragraph tokens. // The fact that the first paragraph is empty indicates // that there was a double-newline at the start of the // data. // Combined together, this means that we are in a paragraph, // and the newline means we should start a new one. $result[] = new HTMLPurifier_Token_End('p'); // However, the start token should only be added if // there is more processing to be done (i.e. there are // real paragraphs in here). If there are none, the // next start paragraph tag will be handled by the // next run-around the injector $needs_start = true; } elseif ($i + 1 == $c) { // a double-paragraph at the end indicates that // there is an overriding need to start a new paragraph // for the next section. This has no effect until // we've processed all of the other paragraphs though $needs_end = true; } } // check if there are no "real" paragraphs to be processed if (empty($paragraphs)) { return; } // add a start tag if an end tag was added while processing // the raw paragraphs (that happens if there's a leading double // newline) if ($needs_start) $result[] = $this->_pStart(); // append the paragraphs onto the result foreach ($paragraphs as $par) { $result[] = new HTMLPurifier_Token_Text($par); $result[] = new HTMLPurifier_Token_End('p'); $result[] = $this->_pStart(); } // remove trailing start token, if one is needed, it will // be handled the next time this injector is called array_pop($result); // check the outside to determine whether or not the // end paragraph tag should be removed. It should be removed // unless the next non-whitespace token is a paragraph // or a block element. $remove_paragraph_end = true; if (!$needs_end) { // Start of the checks one after the current token's index for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start || $this->inputTokens[$i] instanceof HTMLPurifier_Token_Empty) { $remove_paragraph_end = $this->_isInline($this->inputTokens[$i]); } // check if we can abort early (whitespace means we carry-on!) if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text && !$this->inputTokens[$i]->is_whitespace) break; // end tags will automatically be handled by MakeWellFormed, // so we don't have to worry about them if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) break; } } else { $remove_paragraph_end = false; } // check the outside to determine whether or not the // end paragraph tag should be removed if ($remove_paragraph_end) { array_pop($result); } } /** * Returns true if passed token is inline (and, ergo, allowed in * paragraph tags) */ private function _isInline($token) { return isset($this->htmlDefinition->info['p']->child->elements[$token->name]); } }