diff --git a/PureHTMLDefinition.php b/PureHTMLDefinition.php
index 4121d24f..e8e05b5c 100644
--- a/PureHTMLDefinition.php
+++ b/PureHTMLDefinition.php
@@ -5,6 +5,27 @@ class PureHTMLDefinition
var $generator;
var $info = array();
+ var $info_closes_p = array(
+ 'address' => true,
+ 'blockquote' => true,
+ 'dd' => true,
+ 'dir' => true,
+ 'div' => true,
+ 'dl' => true,
+ 'dt' => true,
+ 'h1' => true,
+ 'h2' => true,
+ 'h3' => true,
+ 'h4' => true,
+ 'h5' => true,
+ 'h6' => true,
+ 'hr' => true,
+ 'ol' => true,
+ 'p' => true,
+ 'pre' => true,
+ 'table' => true,
+ 'ul' => true
+ );
function PureHTMLDefinition() {
$this->generator = new HTML_Generator();
@@ -58,6 +79,7 @@ class PureHTMLDefinition
$this->info['del'] =
$this->info['blockquote'] =
$this->info['dd'] =
+ $this->info['li'] =
$this->info['div'] = new HTMLDTD_Element($e_Flow);
$this->info['em'] =
@@ -187,8 +209,37 @@ class PureHTMLDefinition
continue;
}
- // we give start tags precedence, so automatically accept
+ // we give start tags precedence, so automatically accept unless...
+ // it's one of those special cases
if (is_a($token, 'MF_StartTag')) {
+
+ // if there's a parent, check for special case
+ if (!empty($current_nesting)) {
+ $current_parent = array_pop($current_nesting);
+
+ // check if we're closing a P tag
+ if ($current_parent->name == 'p' &&
+ isset($this->info_closes_p[$token->name])
+ ) {
+ $result[] = new MF_EndTag('p');
+ $result[] = $token;
+ $current_nesting[] = $token;
+ continue;
+ }
+
+ // check if we're closing a LI tag
+ if ($current_parent->name == 'li' &&
+ $token->name == 'li'
+ ) {
+ $result[] = new MF_EndTag('li');
+ $result[] = $token;
+ $current_nesting[] = $token;
+ continue;
+ }
+
+ $current_nesting[] = $current_parent; // undo the pop
+ }
+
$result[] = $token;
$current_nesting[] = $token;
continue;
diff --git a/docs/spec.txt b/docs/spec.txt
index 213e22f2..2fb31ad9 100644
--- a/docs/spec.txt
+++ b/docs/spec.txt
@@ -49,11 +49,11 @@ time, it's making sure the tags match up, but there's some trickery going on
for HTML's quirks. They are:
* Set of tags that close P
- 'address', 'blockquote', 'center', 'dd', 'dir', 'div',
+ 'address', 'blockquote', 'dd', 'dir', 'div',
'dl', 'dt', 'h1', 'h2', 'h3', 'h4',
- 'h5', 'h6', 'hr', 'isindex', 'listing', 'marquee',
- 'menu', 'multicol', 'ol', 'p', 'plaintext', 'pre',
- 'table', 'ul', 'xmp',
+ 'h5', 'h6', 'hr',
+ 'ol', 'p', 'pre',
+ 'table', 'ul'
* Li closes li
* more?
diff --git a/tests/PureHTMLDefinition.php b/tests/PureHTMLDefinition.php
index f63e1d20..ed420e86 100644
--- a/tests/PureHTMLDefinition.php
+++ b/tests/PureHTMLDefinition.php
@@ -119,7 +119,55 @@ class Test_PureHTMLDefinition extends UnitTestCase
,new MF_EndTag('div')
);
- // demonstrates start tag precedence
+ // test automatic paragraph closing
+
+ $inputs[7] = array(
+ new MF_StartTag('p')
+ ,new MF_Text('Paragraph 1')
+ ,new MF_StartTag('p')
+ ,new MF_Text('Paragraph 2')
+ );
+ $expect[7] = array(
+ new MF_StartTag('p')
+ ,new MF_Text('Paragraph 1')
+ ,new MF_EndTag('p')
+ ,new MF_StartTag('p')
+ ,new MF_Text('Paragraph 2')
+ ,new MF_EndTag('p')
+ );
+
+ $inputs[8] = array(
+ new MF_StartTag('div')
+ ,new MF_StartTag('p')
+ ,new MF_Text('Paragraph 1 in a div')
+ ,new MF_EndTag('div')
+ );
+ $expect[8] = array(
+ new MF_StartTag('div')
+ ,new MF_StartTag('p')
+ ,new MF_Text('Paragraph 1 in a div')
+ ,new MF_EndTag('p')
+ ,new MF_EndTag('div')
+ );
+
+ $inputs[9] = array(
+ new MF_StartTag('ol')
+ ,new MF_StartTag('li')
+ ,new MF_Text('Item 1')
+ ,new MF_StartTag('li')
+ ,new MF_Text('Item 2')
+ ,new MF_EndTag('ol')
+ );
+ $expect[9] = array(
+ new MF_StartTag('ol')
+ ,new MF_StartTag('li')
+ ,new MF_Text('Item 1')
+ ,new MF_EndTag('li')
+ ,new MF_StartTag('li')
+ ,new MF_Text('Item 2')
+ ,new MF_EndTag('li')
+ ,new MF_EndTag('ol')
+ );
foreach ($inputs as $i => $input) {
$result = $this->def->makeWellFormed($input);