#!/usr/bin/php /'; $regexp = '//'; $regexp_inner = '/&#(?:38;#)?(x?[A-Fa-f0-9]+);/'; foreach ( $entity_files as $file ) { $contents = file_get_contents($entity_dir . $file); // First we match each tag $matches = array(); preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER); foreach ($matches as $match) { //$entity_table[$match[1]] = unichr($match[2]); // Then, for each tag, we match and parse each character in the definition string $matches_inner = array(); preg_match_all($regexp_inner, $match[2], $matches_inner, PREG_SET_ORDER); $entity_table[$match[1]] = ''; foreach ($matches_inner as $match_inner) { $entity_table[$match[1]] .= unichr(intval(str_replace('x', '0x', $match_inner[1]), 0)); } } } $output = serialize($entity_table); $fh = fopen($output_file, 'w'); fwrite($fh, $output); fclose($fh); echo "Completed successfully."; // vim: et sw=4 sts=4