From 8c80b70c37df2021afeb16f64baba56a982ce592 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 16 Apr 2006 19:34:28 +0000 Subject: [PATCH] Implement automatic

and

  • closing. We trimmed down the closing P tags list to something more sane in the spec. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@41 48356398-32a2-884e-a903-53898d9a118a --- PureHTMLDefinition.php | 53 +++++++++++++++++++++++++++++++++++- docs/spec.txt | 8 +++--- tests/PureHTMLDefinition.php | 50 +++++++++++++++++++++++++++++++++- 3 files changed, 105 insertions(+), 6 deletions(-) diff --git a/PureHTMLDefinition.php b/PureHTMLDefinition.php index 4121d24f..e8e05b5c 100644 --- a/PureHTMLDefinition.php +++ b/PureHTMLDefinition.php @@ -5,6 +5,27 @@ class PureHTMLDefinition var $generator; var $info = array(); + var $info_closes_p = array( + 'address' => true, + 'blockquote' => true, + 'dd' => true, + 'dir' => true, + 'div' => true, + 'dl' => true, + 'dt' => true, + 'h1' => true, + 'h2' => true, + 'h3' => true, + 'h4' => true, + 'h5' => true, + 'h6' => true, + 'hr' => true, + 'ol' => true, + 'p' => true, + 'pre' => true, + 'table' => true, + 'ul' => true + ); function PureHTMLDefinition() { $this->generator = new HTML_Generator(); @@ -58,6 +79,7 @@ class PureHTMLDefinition $this->info['del'] = $this->info['blockquote'] = $this->info['dd'] = + $this->info['li'] = $this->info['div'] = new HTMLDTD_Element($e_Flow); $this->info['em'] = @@ -187,8 +209,37 @@ class PureHTMLDefinition continue; } - // we give start tags precedence, so automatically accept + // we give start tags precedence, so automatically accept unless... + // it's one of those special cases if (is_a($token, 'MF_StartTag')) { + + // if there's a parent, check for special case + if (!empty($current_nesting)) { + $current_parent = array_pop($current_nesting); + + // check if we're closing a P tag + if ($current_parent->name == 'p' && + isset($this->info_closes_p[$token->name]) + ) { + $result[] = new MF_EndTag('p'); + $result[] = $token; + $current_nesting[] = $token; + continue; + } + + // check if we're closing a LI tag + if ($current_parent->name == 'li' && + $token->name == 'li' + ) { + $result[] = new MF_EndTag('li'); + $result[] = $token; + $current_nesting[] = $token; + continue; + } + + $current_nesting[] = $current_parent; // undo the pop + } + $result[] = $token; $current_nesting[] = $token; continue; diff --git a/docs/spec.txt b/docs/spec.txt index 213e22f2..2fb31ad9 100644 --- a/docs/spec.txt +++ b/docs/spec.txt @@ -49,11 +49,11 @@ time, it's making sure the tags match up, but there's some trickery going on for HTML's quirks. They are: * Set of tags that close P - 'address', 'blockquote', 'center', 'dd', 'dir', 'div', + 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt', 'h1', 'h2', 'h3', 'h4', - 'h5', 'h6', 'hr', 'isindex', 'listing', 'marquee', - 'menu', 'multicol', 'ol', 'p', 'plaintext', 'pre', - 'table', 'ul', 'xmp', + 'h5', 'h6', 'hr', + 'ol', 'p', 'pre', + 'table', 'ul' * Li closes li * more? diff --git a/tests/PureHTMLDefinition.php b/tests/PureHTMLDefinition.php index f63e1d20..ed420e86 100644 --- a/tests/PureHTMLDefinition.php +++ b/tests/PureHTMLDefinition.php @@ -119,7 +119,55 @@ class Test_PureHTMLDefinition extends UnitTestCase ,new MF_EndTag('div') ); - // demonstrates start tag precedence + // test automatic paragraph closing + + $inputs[7] = array( + new MF_StartTag('p') + ,new MF_Text('Paragraph 1') + ,new MF_StartTag('p') + ,new MF_Text('Paragraph 2') + ); + $expect[7] = array( + new MF_StartTag('p') + ,new MF_Text('Paragraph 1') + ,new MF_EndTag('p') + ,new MF_StartTag('p') + ,new MF_Text('Paragraph 2') + ,new MF_EndTag('p') + ); + + $inputs[8] = array( + new MF_StartTag('div') + ,new MF_StartTag('p') + ,new MF_Text('Paragraph 1 in a div') + ,new MF_EndTag('div') + ); + $expect[8] = array( + new MF_StartTag('div') + ,new MF_StartTag('p') + ,new MF_Text('Paragraph 1 in a div') + ,new MF_EndTag('p') + ,new MF_EndTag('div') + ); + + $inputs[9] = array( + new MF_StartTag('ol') + ,new MF_StartTag('li') + ,new MF_Text('Item 1') + ,new MF_StartTag('li') + ,new MF_Text('Item 2') + ,new MF_EndTag('ol') + ); + $expect[9] = array( + new MF_StartTag('ol') + ,new MF_StartTag('li') + ,new MF_Text('Item 1') + ,new MF_EndTag('li') + ,new MF_StartTag('li') + ,new MF_Text('Item 2') + ,new MF_EndTag('li') + ,new MF_EndTag('ol') + ); foreach ($inputs as $i => $input) { $result = $this->def->makeWellFormed($input);