diff --git a/MarkupLexer.php b/MarkupLexer.php
new file mode 100644
index 00000000..c093bd8c
--- /dev/null
+++ b/MarkupLexer.php
@@ -0,0 +1,192 @@
+next($string, $quotes, $offset);
+ }
+
+ function nextWhiteSpace($string, $offset = 0) {
+ $spaces = array(chr(0x20), chr(0x9), chr(0xD), chr(0xA));
+ return $this->next($string, $spaces, $offset);
+ }
+
+ function next($haystack, $needles, $offset = 0) {
+ if (is_string($needles)) {
+ $string_needles = $needles;
+ $needles = array();
+ $size = strlen($string_needles);
+ for ($i = 0; $i < $size; $i++) {
+ $needles[] = $string_needles{$i};
+ }
+ }
+ $positions = array();
+ foreach ($needles as $needle) {
+ $position = strpos($haystack, $needle, $offset);
+ if ($position !== false) {
+ $positions[] = $position;
+ }
+ }
+ return empty($positions) ? false : min($positions);
+ }
+
+ function tokenizeHTML($string) {
+
+ // some quick checking (if empty, return empty)
+ $string = (string) $string;
+ if ($string == '') return array();
+
+ $cursor = 0; // our location in the text
+ $inside_tag = false; // whether or not we're parsing the inside of a tag
+ $array = array(); // result array
+
+ while(true) {
+
+ $position_next_lt = strpos($string, '<', $cursor);
+ $position_next_gt = strpos($string, '>', $cursor);
+
+ // triggers on "asdf" but not "asdf "
+ if ($position_next_lt === $cursor) {
+ $inside_tag = true;
+ $cursor++;
+ }
+
+ if (!$inside_tag && $position_next_lt !== false) {
+ // We are not inside tag and there still is another tag to parse
+ $array[] = new HTML_Text(substr($string, $cursor, $position_next_lt - $cursor));
+ $cursor = $position_next_lt + 1;
+ $inside_tag = true;
+ continue;
+ } elseif (!$inside_tag) {
+ // We are not inside tag but there are no more tags
+ // If we're already at the end, break
+ if ($cursor === strlen($string)) break;
+ // Create Text of rest of string
+ $array[] = new HTML_Text(substr($string, $cursor));
+ break;
+ } elseif ($inside_tag && $position_next_gt !== false) {
+ // We are in tag and it is well formed
+ // Grab the internals of the tag
+ $segment = substr($string, $cursor, $position_next_gt - $cursor);
+
+ // Check if it's a comment
+ if (substr($segment,0,3) == '!--' && substr($segment,strlen($segment)-2,2) == '--') {
+ $array[] = new HTML_Comment(substr($segment,3,strlen($segment)-5));
+ $inside_tag = false;
+ $cursor = $position_next_gt + 1;
+ continue;
+ }
+
+ // Check if it's an end tag
+ $is_end_tag = (strpos($segment,'/') === 0);
+ if ($is_end_tag) {
+ $type = substr($segment, 1);
+ $array[] = new HTML_EndTag($type);
+ $inside_tag = false;
+ $cursor = $position_next_gt + 1;
+ continue;
+ }
+
+ // Check if it is self closing, if so, remove trailing slash
+ $is_self_closing = (strpos($segment,'/') === strlen($segment) - 1);
+ if ($is_self_closing) {
+ $segment = substr($segment, 0, strlen($segment) - 1);
+ }
+
+ // Check if there are any attributes
+ $position_first_space = $this->nextWhiteSpace($segment);
+ if ($position_first_space === false) {
+ if ($is_self_closing) {
+ $array[] = new HTML_EmptyTag($segment);
+ } else {
+ $array[] = new HTML_StartTag($segment, array());
+ }
+ $inside_tag = false;
+ $cursor = $position_next_gt + 1;
+ continue;
+ }
+
+ // Grab out all the data
+ $type = substr($segment, 0, $position_first_space);
+ $attribute_string = trim(substr($segment, $position_first_space));
+ $attributes = $this->tokenizeAttributeString($attribute_string);
+ if ($is_self_closing) {
+ $array[] = new HTML_EmptyTag($type, $attributes);
+ } else {
+ $array[] = new HTML_StartTag($type, $attributes);
+ }
+ $cursor = $position_next_gt + 1;
+ $inside_tag = false;
+ continue;
+ } else {
+ $array[] = new HTML_Text('<' . substr($string, $cursor));
+ break;
+ }
+ break;
+ }
+ return $array;
+ }
+
+ function tokenizeAttributeString($string) {
+ $string = (string) $string;
+ if ($string == '') return array();
+
+ $array = array();
+ $cursor = 0;
+ $in_value = false;
+ $i = 0;
+ $size = strlen($string);
+ while(true) {
+ if ($cursor >= $size) {
+ break;
+ }
+ $position_next_space = $this->nextWhiteSpace($string, $cursor);
+ //scroll to the last whitespace before text
+ while ($position_next_space === $cursor) {
+ $cursor++;
+ $position_next_space = $this->nextWhiteSpace($string, $cursor);
+ }
+ $position_next_equal = strpos($string, '=', $cursor);
+ if ($position_next_equal !== false &&
+ ($position_next_equal < $position_next_space ||
+ $position_next_space === false)) {
+ //attr="asdf"
+ $key = trim(substr($string, $cursor, $position_next_equal - $cursor));
+ $position_next_quote = $this->nextQuote($string, $cursor);
+ $quote = $string{$position_next_quote};
+ $position_end_quote = strpos($string, $quote, $position_next_quote + 1);
+ $value = substr($string, $position_next_quote + 1,
+ $position_end_quote - $position_next_quote - 1);
+ if ($key) {
+ $array[$key] = $value;
+ }
+ $cursor = $position_end_quote + 1;
+ } else {
+ //boolattr
+ if ($position_next_space === false) {
+ $position_next_space = $size;
+ }
+ $key = substr($string, $cursor, $position_next_space - $cursor);
+ if ($key) {
+ $array[$key] = $key;
+ }
+ $cursor = $position_next_space + 1;
+ }
+ }
+ return $array;
+ }
+
+}
+
+?>
\ No newline at end of file