Factor out auto-paragraph to injector class.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1210 48356398-32a2-884e-a903-53898d9a118a
2025-03-11 17:18:44 +00:00 · 2007-06-23 18:50:41 +00:00 · 2007-06-23 18:50:41 +00:00 · 9191877740
commit 9191877740
parent 3066ca357a
3 changed files with 161 additions and 132 deletions
--- a/library/HTMLPurifier/Injector.php
+++ b/library/HTMLPurifier/Injector.php
@ -0,0 +1,23 @@
+<?php
+
+/**
+ * Injects tokens into the document while parsing for well-formedness.
+ * This enables "formatter-like" functionality such as auto-paragraphing,
+ * smiley-ification and linkification to take place.
+ */
+class HTMLPurifier_Injector
+{
+    
+    /**
+     * Handler that is called when a text token is processed
+     */
+    function handleText(&$token, $config, &$context) {}
+    
+    /**
+     * Handler that is called when a start token is processed
+     */
+    function handleStart(&$token, $config, &$context) {}
+    
+}
+
+?>
--- a/library/HTMLPurifier/Injector/AutoParagraph.php
+++ b/library/HTMLPurifier/Injector/AutoParagraph.php
@ -0,0 +1,132 @@
+<?php
+
+require_once 'HTMLPurifier/Injector.php';
+
+/**
+ * Injector that auto paragraphs text in the root node based on
+ * double-spacing.
+ */
+class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
+{
+    
+    function handleText(&$token, $config, &$context) {
+        $dnl = PHP_EOL . PHP_EOL; // double-newline
+        $current_nesting =& $context->get('CurrentNesting');
+        // paragraphing is on
+        if (empty($current_nesting)) {
+            // we're in root node, great time to start a paragraph
+            // since we're also dealing with a text node
+            $result =& $context->get('OutputTokens');
+            $result[] = new HTMLPurifier_Token_Start('p');
+            $current_nesting[] = new HTMLPurifier_Token_Start('p');
+            $this->_splitText($token, $config, $context);
+        } else {
+            // we're not in root node, so let's see whether or not
+            // we're in a paragraph
+            
+            // losslessly access the parent element
+            $parent = array_pop($current_nesting);
+            $current_nesting[] = $parent;
+            
+            if ($parent->name === 'p') {
+                $this->_splitText($token, $config, $context);
+            }
+        }
+    }
+    
+    /**
+     * Sub-function for auto-paragraphing that takes a token and splits it 
+     * up into paragraphs unconditionally. Requires that a paragraph was
+     * already started
+     */
+    function _splitText(&$token, $config, &$context) {
+        $dnl = PHP_EOL . PHP_EOL; // double-newline
+        $definition = $config->getHTMLDefinition();
+        $current_nesting =& $context->get('CurrentNesting');
+        
+        $raw_paragraphs = explode($dnl, $token->data);
+        
+        $token = false; // token has been completely dismantled
+        
+        // remove empty paragraphs
+        $paragraphs = array();
+        foreach ($raw_paragraphs as $par) {
+            if (trim($par) !== '') $paragraphs[] = $par;
+        }
+        
+        $result =& $context->get('OutputTokens');
+        
+        if (empty($paragraphs) && count($raw_paragraphs) > 1) {
+            $result[] = new HTMLPurifier_Token_End('p');
+            array_pop($current_nesting);
+            return;
+        }
+        
+        foreach ($paragraphs as $data) {
+            $result[] = new HTMLPurifier_Token_Text($data);
+            $result[] = new HTMLPurifier_Token_End('p');
+            $result[] = new HTMLPurifier_Token_Start('p');
+        }
+        array_pop($result); // remove trailing start token
+        
+        // check the outside to determine whether or not end
+        // paragraph tag is needed (it's already there)
+        $end_paragraph = $this->_needsEndTag(
+            $context->get('InputTokens'),
+            $context->get('InputIndex'),
+            $definition
+        );
+        
+        if ($end_paragraph) {
+            // things are good as they stand, remove top-level parent
+            // that we deferred
+            array_pop($current_nesting);
+        } else {
+            // remove the ending tag, no nesting modifications necessary
+            array_pop($result);
+        }
+        
+    }
+    
+    /**
+     * Determines if up-coming code requires an end-paragraph tag,
+     * otherwise, keep the paragraph open (don't make another one)
+     * @protected
+     */
+    function _needsEndTag($tokens, $k, $definition) {
+        $end_paragraph = false;
+        for ($j = $k + 1; isset($tokens[$j]); $j++) {
+            if ($tokens[$j]->type == 'start' || $tokens[$j]->type == 'empty') {
+                if ($tokens[$j]->name == 'p') {
+                    $end_paragraph = true;
+                } else {
+                    $end_paragraph = isset($definition->info['p']->auto_close[$tokens[$j]->name]);
+                }
+                break;
+            } elseif ($tokens[$j]->type == 'text') {
+                if (!$tokens[$j]->is_whitespace) {
+                    $end_paragraph = false;
+                    break;
+                }
+            } elseif ($tokens[$j]->type == 'end') {
+                // nonsensical case
+                $end_paragraph = false;
+                break;
+            }
+        }
+        return $end_paragraph;
+    }
+    
+    function handleStart(&$token, $config, &$context) {
+        $current_nesting =& $context->get('CurrentNesting');
+        if (!empty($current_nesting)) return;
+        $definition = $config->getHTMLDefinition();
+        // to be replaced with new auto-auto_close algorithm
+        if (isset($definition->info['p']->auto_close[$token->name])) return;
+        $result =& $context->get('OutputTokens');
+        $result[] = $current_nesting[] = new HTMLPurifier_Token_Start('p');
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php
@ -4,6 +4,8 @@ require_once 'HTMLPurifier/Strategy.php';
 require_once 'HTMLPurifier/HTMLDefinition.php';
 require_once 'HTMLPurifier/Generator.php';

+require_once 'HTMLPurifier/Injector/AutoParagraph.php';
+
 HTMLPurifier_ConfigSchema::define(
    'Core', 'AutoParagraph', false, 'bool', '
 <p>
@ -42,6 +44,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
        $auto_paragraph_disabled = false;
        $context->register('AutoParagraphSkip', $auto_paragraph_skip);
        
+        $injector = new HTMLPurifier_Injector_AutoParagraph();
+        
        for ($tokens_index = 0; isset($tokens[$tokens_index]); $tokens_index++) {
            
            // if all goes well, this token will be passed through unharmed
@ -60,7 +64,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                
                if ($token->type === 'text') {
                     if ($auto_paragraph && !$auto_paragraph_disabled) {
-                         $this->autoParagraphText($token, $config, $context);
+                         $injector->handleText($token, $config, $context);
                     }
                }
                
@ -113,7 +117,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                }
                
                if ($auto_paragraph && !$auto_paragraph_disabled) {
-                    $this->autoParagraphStart($token, $config, $context);
+                    $injector->handleStart($token, $config, $context);
                }
                
                $this->processToken($token, $config, $context);
@ -227,136 +231,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
        }
    }
    
-    /**
-     * Sub-function call for auto-paragraphing for any old text node.
-     * This will eventually
-     * be factored out into a generic Formatter class
-     * @note This function does not care at all about ending paragraph
-     *       tags: the rest of MakeWellFormed handles that!
-     */
-    function autoParagraphText(&$token, $config, &$context) {
-        $dnl = PHP_EOL . PHP_EOL; // double-newline
-        $current_nesting =& $context->get('CurrentNesting');
-        // paragraphing is on
-        if (empty($current_nesting)) {
-            // we're in root node, great time to start a paragraph
-            // since we're also dealing with a text node
-            $result =& $context->get('OutputTokens');
-            $result[] = new HTMLPurifier_Token_Start('p');
-            $current_nesting[] = new HTMLPurifier_Token_Start('p');
-            $this->autoParagraphSplitText($token, $config, $context);
-        } else {
-            // we're not in root node, so let's see whether or not
-            // we're in a paragraph
-            
-            // losslessly access the parent element
-            $parent = array_pop($current_nesting);
-            $current_nesting[] = $parent;
-            
-            if ($parent->name === 'p') {
-                $this->autoParagraphSplitText($token, $config, $context);
-            }
-        }
-    }
-    
-    /**
-     * Sub-function for auto-paragraphing that takes a token and splits it 
-     * up into paragraphs unconditionally. Requires that a paragraph was
-     * already started
-     */
-    function autoParagraphSplitText(&$token, $config, &$context) {
-        $dnl = PHP_EOL . PHP_EOL; // double-newline
-        $definition = $config->getHTMLDefinition();
-        $current_nesting =& $context->get('CurrentNesting');
-        
-        $raw_paragraphs = explode($dnl, $token->data);
-        
-        $token = false; // token has been completely dismantled
-        
-        // remove empty paragraphs
-        $paragraphs = array();
-        foreach ($raw_paragraphs as $par) {
-            if (trim($par) !== '') $paragraphs[] = $par;
-        }
-        
-        $result =& $context->get('OutputTokens');
-        
-        if (empty($paragraphs) && count($raw_paragraphs) > 1) {
-            $this->processToken(new HTMLPurifier_Token_End('p'), $config, $context);
-            return;
-        }
-        
-        // this could be rewritten to use processToken, but it would
-        // be slightly less efficient
-        
-        foreach ($paragraphs as $data) {
-            $result[] = new HTMLPurifier_Token_Text($data);
-            $result[] = new HTMLPurifier_Token_End('p');
-            $result[] = new HTMLPurifier_Token_Start('p');
-        }
-        array_pop($result); // remove trailing start token
-        
-        // check the outside to determine whether or not end
-        // paragraph tag is needed (it's already there)
-        $end_paragraph = $this->autoParagraphEndParagraph(
-            $context->get('InputTokens'),
-            $context->get('InputIndex'),
-            $definition
-        );
-        
-        if ($end_paragraph) {
-            // things are good as they stand, remove top-level parent
-            // that we deferred
-            array_pop($current_nesting);
-        } else {
-            // remove the ending tag, no nesting modifications necessary
-            array_pop($result);
-        }
-        
-    }
-    
-    /**
-     * Determines if up-coming code requires an end-paragraph tag,
-     * otherwise, keep the paragraph open (don't make another one)
-     * @protected
-     */
-    function autoParagraphEndParagraph($tokens, $k, $definition) {
-        $end_paragraph = false;
-        for ($j = $k + 1; isset($tokens[$j]); $j++) {
-            if ($tokens[$j]->type == 'start' || $tokens[$j]->type == 'empty') {
-                if ($tokens[$j]->name == 'p') {
-                    $end_paragraph = true;
-                } else {
-                    $end_paragraph = isset($definition->info['p']->auto_close[$tokens[$j]->name]);
-                }
-                break;
-            } elseif ($tokens[$j]->type == 'text') {
-                if (!$tokens[$j]->is_whitespace) {
-                    $end_paragraph = false;
-                    break;
-                }
-            } elseif ($tokens[$j]->type == 'end') {
-                // nonsensical case
-                $end_paragraph = false;
-                break;
-            }
-        }
-        return $end_paragraph;
-    }
-    
-    /**
-     * Sub-function for auto-paragraphing that processes element starts
-     */
-    function autoParagraphStart(&$token, $config, &$context) {
-        $current_nesting =& $context->get('CurrentNesting');
-        if (!empty($current_nesting)) return;
-        $definition = $config->getHTMLDefinition();
-        // to be replaced with new auto-auto_close algorithm
-        if (isset($definition->info['p']->auto_close[$token->name])) return;
-        $result =& $context->get('OutputTokens');
-        $this->processToken(new HTMLPurifier_Token_Start('p'), $config, $context);
-    }
-    
 }

 ?>