htmlpurifier/tests/HTML_Lexer.php

<?php

class TestCase_HTML_Lexer extends UnitTestCase
{
    
    var $HTML_Lexer;
    var $HTML_Lexer_Sax;
    
    function setUp() {
        $this->HTML_Lexer     =& new HTML_Lexer();
        $this->HTML_Lexer_Sax =& new HTML_Lexer_Sax();
    }
    
    function test_nextWhiteSpace() {
        $HP =& $this->HTML_Lexer;
        $this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
        $this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
        $this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
        $this->assertIdentical(1, $HP->nextWhiteSpace("a\tsdf"));
        $this->assertIdentical(4, $HP->nextWhiteSpace("asdf\r"));
        $this->assertIdentical(2, $HP->nextWhiteSpace("as\t\r\nasdf as"));
    }
    
    function test_tokenizeHTML() {
        
        $input[] = '';
        $expect[] = array();
        
        $input[] = 'This is regular text.';
        $expect[] = array(
            new MF_Text('This is regular text.')
            );
        
        $input[] = 'This is <b>bold</b> text';
        $expect[] = array(
            new MF_Text('This is ')
           ,new MF_StartTag('b', array())
           ,new MF_Text('bold')
           ,new MF_EndTag('b')
           ,new MF_Text(' text')
            );
        
        $input[] = '<DIV>Totally rad dude. <b>asdf</b></div>';
        $expect[] = array(
            new MF_StartTag('DIV', array())
           ,new MF_Text('Totally rad dude. ')
           ,new MF_StartTag('b', array())
           ,new MF_Text('asdf')
           ,new MF_EndTag('b')
           ,new MF_EndTag('div')
            );
        
        $input[] = '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>';
        $expect[] = array(
            new MF_StartTag('asdf')
           ,new MF_EndTag('asdf')
           ,new MF_StartTag('d')
           ,new MF_EndTag('d')
           ,new MF_StartTag('poOloka')
           ,new MF_StartTag('poolasdf')
           ,new MF_StartTag('ds')
           ,new MF_EndTag('asdf')
           ,new MF_EndTag('ASDF')
            );
        
        $input[] = '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>';
        $expect[] = array(
            new MF_StartTag('a',array('href'=>'foobar.php','title'=>'foo!'))
           ,new MF_Text('Link to ')
           ,new MF_StartTag('b',array('id'=>'asdf'))
           ,new MF_Text('foobar')
           ,new MF_EndTag('b')
           ,new MF_EndTag('a')
            );
        
        $input[] = '<br />';
        $expect[] = array(
            new MF_EmptyTag('br')
            );
        
        $input[] = '<!-- Comment --> <!-- not so well formed --->';
        $expect[] = array(
            new MF_Comment(' Comment ')
           ,new MF_Text(' ')
           ,new MF_Comment(' not so well formed -')
            );
        
        $input[] = '<a href=""';
        $expect[] = array(
            new MF_Text('<a href=""')
            );
        
        $size = count($input);
        for($i = 0; $i < $size; $i++) {
            $result = $this->HTML_Lexer->tokenizeHTML($input[$i]);
            $this->assertEqual($expect[$i], $result);
            paintIf($result, $expect[$i] != $result);
            
            // since I didn't write the parser, I can't define its behavior
            // however, make sure that the class runs without any errors
            $exp_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]);
        }
        
    }
    
    function test_tokenizeAttributeString() {
        
        $input[] = 'href="asdf" boom="assdf"';
        $expect[] = array('href'=>'asdf', 'boom'=>'assdf');
        
        $input[] = "href='r'";
        $expect[] = array('href'=>'r');
        
        $input[] = 'onclick="javascript:alert(\'asdf\');"';
        $expect[] = array('onclick' => "javascript:alert('asdf');");
        
        $input[] = 'selected';
        $expect[] = array('selected'=>'selected');
        
        $input[] = '="asdf"';
        $expect[] = array();
        
        $size = count($input);
        for($i = 0; $i < $size; $i++) {
            $result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]);
            $this->assertEqual($expect[$i], $result);
            paintIf($result, $expect[$i] != $result);
        }
        
    }
    
    
}

?>
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`<?php`

Cleanup final renaming stuff (how could I forget to rename the class) and hook in the SAX parser. It has a bit different behavior, so you'll have to be careful. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@21 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:47:12 +00:00			`class TestCase_HTML_Lexer extends UnitTestCase`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`{`

Cleanup final renaming stuff (how could I forget to rename the class) and hook in the SAX parser. It has a bit different behavior, so you'll have to be careful. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@21 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:47:12 +00:00			`var $HTML_Lexer;`
			`var $HTML_Lexer_Sax;`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00
			`function setUp() {`
Cleanup final renaming stuff (how could I forget to rename the class) and hook in the SAX parser. It has a bit different behavior, so you'll have to be careful. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@21 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:47:12 +00:00			`$this->HTML_Lexer =& new HTML_Lexer();`
			`$this->HTML_Lexer_Sax =& new HTML_Lexer_Sax();`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`}`

			`function test_nextWhiteSpace() {`
Cleanup final renaming stuff (how could I forget to rename the class) and hook in the SAX parser. It has a bit different behavior, so you'll have to be careful. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@21 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:47:12 +00:00			`$HP =& $this->HTML_Lexer;`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`$this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));`
			`$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));`
			`$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));`
			`$this->assertIdentical(1, $HP->nextWhiteSpace("a\tsdf"));`
			`$this->assertIdentical(4, $HP->nextWhiteSpace("asdf\r"));`
			`$this->assertIdentical(2, $HP->nextWhiteSpace("as\t\r\nasdf as"));`
			`}`

			`function test_tokenizeHTML() {`

			`$input[] = '';`
			`$expect[] = array();`

			`$input[] = 'This is regular text.';`
			`$expect[] = array(`
Kick naming changes into effect. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@20 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:30:26 +00:00			`new MF_Text('This is regular text.')`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`);`

			`$input[] = 'This is <b>bold</b> text';`
			`$expect[] = array(`
Kick naming changes into effect. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@20 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:30:26 +00:00			`new MF_Text('This is ')`
			`,new MF_StartTag('b', array())`
			`,new MF_Text('bold')`
			`,new MF_EndTag('b')`
			`,new MF_Text(' text')`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`);`

			`$input[] = '<DIV>Totally rad dude. <b>asdf</b></div>';`
			`$expect[] = array(`
Kick naming changes into effect. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@20 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:30:26 +00:00			`new MF_StartTag('DIV', array())`
			`,new MF_Text('Totally rad dude. ')`
			`,new MF_StartTag('b', array())`
			`,new MF_Text('asdf')`
			`,new MF_EndTag('b')`
			`,new MF_EndTag('div')`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`);`

			`$input[] = '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>';`
			`$expect[] = array(`
Kick naming changes into effect. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@20 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:30:26 +00:00			`new MF_StartTag('asdf')`
			`,new MF_EndTag('asdf')`
			`,new MF_StartTag('d')`
			`,new MF_EndTag('d')`
			`,new MF_StartTag('poOloka')`
			`,new MF_StartTag('poolasdf')`
			`,new MF_StartTag('ds')`
			`,new MF_EndTag('asdf')`
			`,new MF_EndTag('ASDF')`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`);`

			`$input[] = '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>';`
			`$expect[] = array(`
Kick naming changes into effect. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@20 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:30:26 +00:00			`new MF_StartTag('a',array('href'=>'foobar.php','title'=>'foo!'))`
			`,new MF_Text('Link to ')`
			`,new MF_StartTag('b',array('id'=>'asdf'))`
			`,new MF_Text('foobar')`
			`,new MF_EndTag('b')`
			`,new MF_EndTag('a')`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`);`

			`$input[] = '<br />';`
			`$expect[] = array(`
Kick naming changes into effect. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@20 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:30:26 +00:00			`new MF_EmptyTag('br')`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`);`

			`$input[] = '<!-- Comment --> <!-- not so well formed --->';`
			`$expect[] = array(`
Kick naming changes into effect. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@20 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:30:26 +00:00			`new MF_Comment(' Comment ')`
			`,new MF_Text(' ')`
			`,new MF_Comment(' not so well formed -')`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`);`

			`$input[] = '<a href=""';`
			`$expect[] = array(`
Kick naming changes into effect. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@20 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:30:26 +00:00			`new MF_Text('<a href=""')`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`);`

			`$size = count($input);`
			`for($i = 0; $i < $size; $i++) {`
Cleanup final renaming stuff (how could I forget to rename the class) and hook in the SAX parser. It has a bit different behavior, so you'll have to be careful. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@21 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:47:12 +00:00			`$result = $this->HTML_Lexer->tokenizeHTML($input[$i]);`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`$this->assertEqual($expect[$i], $result);`
			`paintIf($result, $expect[$i] != $result);`
Cleanup final renaming stuff (how could I forget to rename the class) and hook in the SAX parser. It has a bit different behavior, so you'll have to be careful. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@21 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:47:12 +00:00
			`// since I didn't write the parser, I can't define its behavior`
			`// however, make sure that the class runs without any errors`
			`$exp_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]);`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`}`

			`}`

			`function test_tokenizeAttributeString() {`

			`$input[] = 'href="asdf" boom="assdf"';`
			`$expect[] = array('href'=>'asdf', 'boom'=>'assdf');`

			`$input[] = "href='r'";`
			`$expect[] = array('href'=>'r');`

			`$input[] = 'onclick="javascript:alert(\'asdf\');"';`
			`$expect[] = array('onclick' => "javascript:alert('asdf');");`

			`$input[] = 'selected';`
			`$expect[] = array('selected'=>'selected');`

			`$input[] = '="asdf"';`
			`$expect[] = array();`

			`$size = count($input);`
			`for($i = 0; $i < $size; $i++) {`
Cleanup final renaming stuff (how could I forget to rename the class) and hook in the SAX parser. It has a bit different behavior, so you'll have to be careful. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@21 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:47:12 +00:00			`$result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]);`
Copy of tests for MarkupLexer git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@19 48356398-32a2-884e-a903-53898d9a118a 2006-04-15 01:17:13 +00:00			`$this->assertEqual($expect[$i], $result);`
			`paintIf($result, $expect[$i] != $result);`
			`}`

			`}`


			`}`

			`?>`