From 31704c92f6421a958cb880c20ce75ee1892f3b7c Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 24 Jun 2007 02:45:38 +0000 Subject: [PATCH] Implement working linkification, now, the real challenge is to get it to play nice with auto-paragraphing. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1216 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/Injector/Linkify.php | 56 +++++++++++++++++++ .../HTMLPurifier/Strategy/MakeWellFormed.php | 48 ++++++++++++---- 2 files changed, 94 insertions(+), 10 deletions(-) create mode 100644 library/HTMLPurifier/Injector/Linkify.php diff --git a/library/HTMLPurifier/Injector/Linkify.php b/library/HTMLPurifier/Injector/Linkify.php new file mode 100644 index 00000000..19be2152 --- /dev/null +++ b/library/HTMLPurifier/Injector/Linkify.php @@ -0,0 +1,56 @@ +get('CurrentNesting'); + // this snippet could be factored out + $definition = $config->getHTMLDefinition(); + if (!empty($current_nesting)) { + $parent_token = array_pop($current_nesting); + $current_nesting[] = $parent_token; + $parent = $definition->info[$parent_token->name]; + } else { + $parent = $definition->info_parent_def; + } + if (!isset($parent->child->elements['a'])) { + // parent element does not allow link elements, don't bother + return; + } + if (strpos($token->data, '://') === false) { + // our really quick heuristic failed, abort + // this may not work so well if we want to match things like + // "google.com" + return; + } + // there is/are URL(s). Let's split the string: + + $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + + $token = array(); + + // $i = index + // $c = count + // $l = is link + for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { + if (!$l) { + if ($bits[$i] === '') continue; + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + } else { + $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + $token[] = new HTMLPurifier_Token_End('a'); + } + } + + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index cba0c900..abd3cf67 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -5,6 +5,7 @@ require_once 'HTMLPurifier/HTMLDefinition.php'; require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Injector/AutoParagraph.php'; +require_once 'HTMLPurifier/Injector/Linkify.php'; HTMLPurifier_ConfigSchema::define( 'Core', 'AutoParagraph', false, 'bool', ' @@ -16,6 +17,16 @@ HTMLPurifier_ConfigSchema::define( ' ); +HTMLPurifier_ConfigSchema::define( + 'Core', 'AutoLinkify', false, 'bool', ' +

+ This directive will cause HTML Purifier to automatically linkify + text that looks like URLs. This directive has been available since + 2.0.1. +

+' +); + /** * Takes tokens makes them well-formed (balance end tags, etc.) */ @@ -49,6 +60,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $injector_disabled[] = false; } + if ($config->get('Core', 'AutoLinkify')) { + $injector[] = new HTMLPurifier_Injector_Linkify(); + $injector_skip[] = 0; + $injector_disabled[] = false; + } + $current_injector = 0; $context->register('Injector', $injector); @@ -60,13 +77,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // if all goes well, this token will be passed through unharmed $token = $tokens[$tokens_index]; - // this will be more complicated - if (isset($injector[$current_injector])) { - if ($injector_skip[$current_injector] > 0) { - $injector_skip[$current_injector]--; - $injector_disabled[$current_injector] = true; + foreach ($injector as $i => $x) { + if ($injector_skip[$i] > 0) { + $injector_skip[$i]--; + $injector_disabled[$i] = true; } else { - $injector_disabled[$current_injector] = false; + $injector_disabled[$i] = false; } } @@ -74,8 +90,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if (empty( $token->is_tag )) { if ($token->type === 'text') { - if (isset($injector[$current_injector]) && !$injector_disabled[$current_injector]) { - $injector[$current_injector]->handleText($token, $config, $context); + foreach ($injector as $i => $x) { + if (!$injector_disabled[$i]) { + $x->handleText($token, $config, $context); + } + if (is_array($token)) { + $current_injector = $i; + break; + } } } @@ -127,8 +149,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $current_nesting[] = $parent; // undo the pop } - if (isset($injector[$current_injector]) && !$injector_disabled[$current_injector]) { - $injector[$current_injector]->handleStart($token, $config, $context); + foreach ($injector as $i => $x) { + if (!$injector_disabled[$i]) { + $x->handleStart($token, $config, $context); + } + if (is_array($token)) { + $current_injector = $i; + break; + } } $this->processToken($token, $config, $context);