diff --git a/library/HTMLPurifier/EntityLookup.php b/library/HTMLPurifier/EntityLookup.php
new file mode 100644
index 00000000..ce3f4f01
--- /dev/null
+++ b/library/HTMLPurifier/EntityLookup.php
@@ -0,0 +1,25 @@
+<?php
+
+class HTMLPurifier_EntityLookup {
+    
+    var $table;
+    
+    function HTMLPurifier_EntityLookup($file = false) {
+        if (!$file) {
+            $file = dirname(__FILE__) . '/EntityLookup/data.txt';
+        }
+        $this->table = unserialize(file_get_contents($file));
+    }
+    
+    function instance() {
+        // no references, since PHP doesn't copy unless modified
+        static $instance = null;
+        if (!$instance) {
+            $instance = new HTMLPurifier_EntityLookup();
+        }
+        return $instance;
+    }
+    
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/EntityLookup/data.txt b/library/HTMLPurifier/EntityLookup/data.txt
new file mode 100644
index 00000000..f2b8b8f2
--- /dev/null
+++ b/library/HTMLPurifier/EntityLookup/data.txt
@@ -0,0 +1 @@
+a:246:{s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"­";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"‌";s:3:"zwj";s:3:"‍";s:3:"lrm";s:3:"‎";s:3:"rlm";s:3:"‏";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"fnof";s:2:"ƒ";s:5:"Alpha";s:2:"Α";s:4:"Beta";s:2:"Β";s:5:"Gamma";s:2:"Γ";s:5:"Delta";s:2:"Δ";s:7:"Epsilon";s:2:"Ε";s:4:"Zeta";s:2:"Ζ";s:3:"Eta";s:2:"Η";s:5:"Theta";s:2:"Θ";s:4:"Iota";s:2:"Ι";s:5:"Kappa";s:2:"Κ";s:6:"Lambda";s:2:"Λ";s:2:"Mu";s:2:"Μ";s:2:"Nu";s:2:"Ν";s:2:"Xi";s:2:"Ξ";s:7:"Omicron";s:2:"Ο";s:2:"Pi";s:2:"Π";s:3:"Rho";s:2:"Ρ";s:5:"Sigma";s:2:"Σ";s:3:"Tau";s:2:"Τ";s:7:"Upsilon";s:2:"Υ";s:3:"Phi";s:2:"Φ";s:3:"Chi";s:2:"Χ";s:3:"Psi";s:2:"Ψ";s:5:"Omega";s:2:"Ω";s:5:"alpha";s:2:"α";s:4:"beta";s:2:"β";s:5:"gamma";s:2:"γ";s:5:"delta";s:2:"δ";s:7:"epsilon";s:2:"ε";s:4:"zeta";s:2:"ζ";s:3:"eta";s:2:"η";s:5:"theta";s:2:"θ";s:4:"iota";s:2:"ι";s:5:"kappa";s:2:"κ";s:6:"lambda";s:2:"λ";s:2:"mu";s:2:"μ";s:2:"nu";s:2:"ν";s:2:"xi";s:2:"ξ";s:7:"omicron";s:2:"ο";s:2:"pi";s:2:"π";s:3:"rho";s:2:"ρ";s:6:"sigmaf";s:2:"ς";s:5:"sigma";s:2:"σ";s:3:"tau";s:2:"τ";s:7:"upsilon";s:2:"υ";s:3:"phi";s:2:"φ";s:3:"chi";s:2:"χ";s:3:"psi";s:2:"ψ";s:5:"omega";s:2:"ω";s:8:"thetasym";s:2:"ϑ";s:5:"upsih";s:2:"ϒ";s:3:"piv";s:2:"ϖ";s:4:"bull";s:3:"•";s:6:"hellip";s:3:"…";s:5:"prime";s:3:"′";s:5:"Prime";s:3:"″";s:5:"oline";s:3:"‾";s:5:"frasl";s:3:"⁄";s:6:"weierp";s:3:"℘";s:5:"image";s:3:"ℑ";s:4:"real";s:3:"ℜ";s:5:"trade";s:3:"™";s:7:"alefsym";s:3:"ℵ";s:4:"larr";s:3:"←";s:4:"uarr";s:3:"↑";s:4:"rarr";s:3:"→";s:4:"darr";s:3:"↓";s:4:"harr";s:3:"↔";s:5:"crarr";s:3:"↵";s:4:"lArr";s:3:"⇐";s:4:"uArr";s:3:"⇑";s:4:"rArr";s:3:"⇒";s:4:"dArr";s:3:"⇓";s:4:"hArr";s:3:"⇔";s:6:"forall";s:3:"∀";s:4:"part";s:3:"∂";s:5:"exist";s:3:"∃";s:5:"empty";s:3:"∅";s:5:"nabla";s:3:"∇";s:4:"isin";s:3:"∈";s:5:"notin";s:3:"∉";s:2:"ni";s:3:"∋";s:4:"prod";s:3:"∏";s:3:"sum";s:3:"∑";s:5:"minus";s:3:"−";s:6:"lowast";s:3:"∗";s:5:"radic";s:3:"√";s:4:"prop";s:3:"∝";s:5:"infin";s:3:"∞";s:3:"ang";s:3:"∠";s:3:"and";s:3:"∧";s:2:"or";s:3:"∨";s:3:"cap";s:3:"∩";s:3:"cup";s:3:"∪";s:3:"int";s:3:"∫";s:3:"sim";s:3:"∼";s:4:"cong";s:3:"≅";s:5:"asymp";s:3:"≈";s:2:"ne";s:3:"≠";s:5:"equiv";s:3:"≡";s:2:"le";s:3:"≤";s:2:"ge";s:3:"≥";s:3:"sub";s:3:"⊂";s:3:"sup";s:3:"⊃";s:4:"nsub";s:3:"⊄";s:4:"sube";s:3:"⊆";s:4:"supe";s:3:"⊇";s:5:"oplus";s:3:"⊕";s:6:"otimes";s:3:"⊗";s:4:"perp";s:3:"⊥";s:4:"sdot";s:3:"⋅";s:5:"lceil";s:3:"⌈";s:5:"rceil";s:3:"⌉";s:6:"lfloor";s:3:"⌊";s:6:"rfloor";s:3:"⌋";s:4:"lang";s:3:"〈";s:4:"rang";s:3:"〉";s:3:"loz";s:3:"◊";s:6:"spades";s:3:"♠";s:5:"clubs";s:3:"♣";s:6:"hearts";s:3:"♥";s:5:"diams";s:3:"♦";}
\ No newline at end of file
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index 34c489ed..a079df82 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -85,6 +85,68 @@ class HTMLPurifier_Lexer
         return $lexer;
     }
     
+    
+    /**
+     * Callback regex string for parsing entities.
+     * @protected
+     */
+    var $_substituteEntitiesRegex =
+        //       1. hex          2. dec  3. string
+        '/&[#](?:x([a-fA-F0-9]+)|0*(\d+)|([A-Za-z]+));?/';
+    
+    /**
+     * Substitutes non-special entities with their parsed equivalents. Since
+     * running this whenever you have parsed character is t3h 5uck, we run
+     * it before everything else.
+     * 
+     * @protected
+     * @param $string String to have non-special entities parsed.
+     * @returns Parsed string.
+     */
+    function substituteNonSpecialEntities($string) {
+        // it will try to detect missing semicolons, but don't rely on it
+        return preg_replace_callback(
+            $this->_substituteEntitiesRegex,
+            array('HTMLPurifier_Lexer_DirectLex', 'nonSpecialEntityCallback'),
+            $string);
+    }
+    
+    /**
+     * Callback function for substituteNonSpecialEntities() that does the work.
+     * 
+     * @warning Though this is public in order to let the callback happen,
+     *          calling it directly is not recommended.
+     * @param $matches  PCRE-style matches array, with 0 the entire match, and
+     *                  either index 1, 2 or 3 set with a hex value, dec value,
+     *                  or string (respectively).
+     * @returns Replacement string.
+     * @todo Implement string translations
+     */
+    function nonSpecialEntityCallback($matches) {
+        // replaces all but big five
+        $entity = $matches[0];
+        $is_num = (@$matches[0][1] === '#');
+        if ($is_num) {
+            $is_hex = (@$entity[2] === 'x');
+            $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
+            if (isset($this->_special_dec2str[$int]))  return $entity;
+            return chr($int);
+        } else {
+            if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
+            if (!$this->_entity_lookup) {
+                require_once 'HTMLPurifier/EntityLookup.php';
+                $this->_entity_lookup = EntityLookup::instance();
+            }
+            if (isset($this->_entity_lookup->table[$matches[3]])) {
+                return $this->_entity_lookup->table[$matches[3]];
+            } else {
+                return $entity;
+            }
+        }
+    }
+    
+    var $_entity_lookup;
+    
 }
 
 ?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php
index 91706370..587f1928 100644
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -11,7 +11,6 @@ require_once 'HTMLPurifier/Lexer.php';
  * pales in comparison to HTMLPurifier_Lexer_DOMLex.  It will support UTF-8
  * completely eventually.
  * 
- * @todo Implement non-special string entity conversion.
  * @todo Reread XML spec and document differences.
  * @todo Add support for CDATA sections.
  * @todo Determine correct behavior in outputting comment data. (preserve dashes?)
@@ -99,56 +98,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
             '&#x27;' => "'",
         );
     
-    /**
-     * Callback regex string for parsing entities.
-     * @protected
-     */
-    var $_substituteEntitiesRegex =
-        //       1. hex          2. dec  3. string
-        '/&[#](?:x([a-fA-F0-9]+)|0*(\d+)|([A-Za-z]+));?/';
-    
-    /**
-     * Substitutes non-special entities with their parsed equivalents.
-     * 
-     * @protected
-     * @param $string String to have non-special entities parsed.
-     * @returns Parsed string.
-     */
-    function substituteNonSpecialEntities($string) {
-        // it will try to detect missing semicolons, but don't rely on it
-        return preg_replace_callback(
-            $this->_substituteEntitiesRegex,
-            array('HTMLPurifier_Lexer_DirectLex', 'nonSpecialEntityCallback'),
-            $string);
-    }
-    
-    /**
-     * Callback function for substituteNonSpecialEntities() that does the work.
-     * 
-     * @warning Though this is public in order to let the callback happen,
-     *          calling it directly is not recommended.
-     * @param $matches  PCRE-style matches array, with 0 the entire match, and
-     *                  either index 1, 2 or 3 set with a hex value, dec value,
-     *                  or string (respectively).
-     * @returns Replacement string.
-     * @todo Implement string translations
-     */
-    function nonSpecialEntityCallback($matches) {
-        // replaces all but big five
-        $entity = $matches[0];
-        $is_num = (@$matches[0][1] === '#');
-        if ($is_num) {
-            $is_hex = (@$entity[2] === 'x');
-            $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
-            if (isset($this->_special_dec2str[$int]))  return $entity;
-            return chr($int);
-        } else {
-            if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
-            // translate $matches[3]
-            return '';
-        }
-    }
-    
     /**
      * Substitutes only special entities with their parsed equivalents.
      * 
diff --git a/maintenance/.htaccess b/maintenance/.htaccess
new file mode 100644
index 00000000..03688ee9
--- /dev/null
+++ b/maintenance/.htaccess
@@ -0,0 +1 @@
+Deny from all
diff --git a/maintenance/generate-entity-file.php b/maintenance/generate-entity-file.php
new file mode 100644
index 00000000..b46586b5
--- /dev/null
+++ b/maintenance/generate-entity-file.php
@@ -0,0 +1,68 @@
+#!/usr/bin/php
+<?php
+
+/**
+ * Parses *.ent files into an entity lookup table, and then serializes and
+ * writes the whole kaboodle to a file. The resulting file should be versioned.
+ */
+
+chdir( dirname(__FILE__) );
+
+// here's where the entity files are located, assuming working directory
+// is the same as the location of this PHP file. Needs trailing slash.
+$entity_dir = '../docs/entities/';
+
+// defines the output file for the serialized content.
+$output_file = '../library/HTMLPurifier/EntityLookup/data.txt';
+
+function unichr($dec) {
+    if ($dec < 128) {
+        $utf  = chr($dec);
+    } else if ($dec < 2048) {
+        $utf  = chr(192 + (($dec - ($dec % 64)) / 64));
+        $utf .= chr(128 + ($dec % 64));
+    } else {
+        $utf  = chr(224 + (($dec - ($dec % 4096)) / 4096));
+        $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
+        $utf .= chr(128 + ($dec % 64));
+    }
+    return $utf;
+}
+
+if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n");
+if ( file_exists($output_file) ) exit("Fatal Error: entity-lookup.txt already exists.\n");
+
+$dh = @opendir($entity_dir);
+if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n");
+
+$entity_files = array();
+while (($file = readdir($dh)) !== false) {
+    if (@$file[0] === '.') continue;
+    if (substr(strrchr($file, "."), 1) !== 'ent') continue;
+    $entity_files[] = $file;
+}
+closedir($dh);
+
+if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n");
+
+$entity_table = array();
+$regexp = '/<!ENTITY\s+([A-Za-z]+)\s+"&#(?:38;#)?([0-9]+);">/';
+
+foreach ( $entity_files as $file ) {
+    $contents = file_get_contents($entity_dir . $file);
+    $matches = array();
+    preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER);
+    foreach ($matches as $match) {
+        $entity_table[$match[1]] = unichr($match[2]);
+    }
+}
+
+$output = serialize($entity_table);
+
+$fh = fopen($output_file, 'w');
+fwrite($fh, $output);
+fclose($fh);
+
+echo "Completed successfully.";
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/EntityLookupTest.php b/tests/HTMLPurifier/EntityLookupTest.php
new file mode 100644
index 00000000..c490d55a
--- /dev/null
+++ b/tests/HTMLPurifier/EntityLookupTest.php
@@ -0,0 +1,29 @@
+<?php
+
+// this page is UTF-8 encoded!
+
+require_once 'HTMLPurifier/EntityLookup.php';
+
+class HTMLPurifier_EntityLookupTest extends UnitTestCase
+{
+    
+    function test() {
+        
+        $lookup = HTMLPurifier_EntityLookup::instance();
+        
+        // latin char
+        $this->assertIdentical('â', $lookup->table['acirc']);
+        
+        // special char
+        $this->assertIdentical('"', $lookup->table['quot']);
+        $this->assertIdentical('“', $lookup->table['ldquo']);
+        $this->assertIdentical('<', $lookup->table['lt']); //expressed strangely
+        
+        // symbol char
+        $this->assertIdentical('θ', $lookup->table['theta']);
+        
+    }
+    
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php
index 2693b825..0f988a6b 100644
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@@ -7,6 +7,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
 {
     
     var $DirectLex, $PEARSax3, $DOMLex;
+    var $_entity_lookup;
     var $_has_dom;
     
     function setUp() {
@@ -14,12 +15,13 @@ class HTMLPurifier_LexerTest extends UnitTestCase
         $this->PEARSax3  = new HTMLPurifier_Lexer_PEARSax3();
         
         $this->_has_dom = version_compare(PHP_VERSION, '5', '>=');
-        
         if ($this->_has_dom) {
             require_once 'HTMLPurifier/Lexer/DOMLex.php';
             $this->DOMLex    = new HTMLPurifier_Lexer_DOMLex();
         }
         
+        $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
+        
     }
     
     function test_tokenizeHTML() {
@@ -152,8 +154,12 @@ class HTMLPurifier_LexerTest extends UnitTestCase
         // compare with this valid one:
         $input[12] = '&quot;';
         $expect[12] = array( new HTMLPurifier_Token_Text('"') );
-        $sax_expect[12] = false;
-        // SAX chokes on this? We do have entity parsing on, so it should work!
+        $sax_expect[12] = false; // choked!
+        
+        // DOM and SAX choke on this
+        //$char_circ = $this->_entity_lookup->table['circ'];
+        //$input[13] = '&circ;';
+        //$expect[13] = array( new HTMLPurifier_Token_Text($char_circ) );
         
         foreach($input as $i => $discard) {
             $result = $this->DirectLex->tokenizeHTML($input[$i]);
diff --git a/tests/index.php b/tests/index.php
index 593023ec..2dcd7694 100644
--- a/tests/index.php
+++ b/tests/index.php
@@ -19,6 +19,7 @@ $test->addTestFile('HTMLPurifier/Lexer/DirectLexTest.php');
 $test->addTestFile('HTMLPurifier/DefinitionTest.php');
 $test->addTestFile('HTMLPurifier/ChildDefTest.php');
 $test->addTestFile('HTMLPurifier/GeneratorTest.php');
+$test->addTestFile('HTMLPurifier/EntityLookupTest.php');
 
 $test->run( new HtmlReporter() );