Finally stabilize the unit converter.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1750 48356398-32a2-884e-a903-53898d9a118a
2024-12-22 08:21:52 +00:00 · 2008-05-20 21:23:38 +00:00 · 2008-05-20 21:23:38 +00:00 · d8da5ff406
commit d8da5ff406
parent fda310f1e7
6 changed files with 160 additions and 41 deletions
--- a/2
+++ b/2
@ -17,6 +17,8 @@ afraid to cast your vote for the next feature to be implemented!
 - Maintain old attribute data in tokens (configurable?)
 - Allow URIFilters to run early?
 - Lazy update of token when validating attributes?
+- Investigate how early internal structures can be accessed; this would
+  prevent structures from being parsed and serialized multiple times.

 FUTURE VERSIONS
 ---------------
--- a/library/HTMLPurifier/AttrDef/CSS/Number.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Number.php
@ -18,6 +18,10 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
        $this->non_negative = $non_negative;
    }
    
+    /**
+     * @warning Some contexts do not pass $config, $context. These
+     *          variables should not be used without checking HTMLPurifier_Length
+     */
    public function validate($number, $config, $context) {
        
        $number = $this->parseCDATA($number);
--- a/library/HTMLPurifier/Length.php
+++ b/library/HTMLPurifier/Length.php
@ -2,7 +2,7 @@

 /**
 * Represents a measurable length, with a string numeric magnitude
- * and a unit.
+ * and a unit. This object is immutable.
 */
 class HTMLPurifier_Length
 {
@ -10,12 +10,17 @@ class HTMLPurifier_Length
    /**
     * String numeric magnitude.
     */
-    public $n;
+    protected $n;
    
    /**
     * String unit. False is permitted if $n = 0.
     */
-    public $unit;
+    protected $unit;
+    
+    /**
+     * Whether or not this length is valid. Null if not calculated yet.
+     */
+    protected $isValid;
    
    /**
     * Lookup array of units recognized by CSS 2.1
@ -30,8 +35,8 @@ class HTMLPurifier_Length
     * @param string $u Unit
     */
    public function __construct($n = '0', $u = false) {
-        $this->n = $n;
-        $this->unit = $u;
+        $this->n = (string) $n;
+        $this->unit = $u !== false ? (string) $u : false;
    }
    
    /**
@ -51,13 +56,15 @@ class HTMLPurifier_Length
     * @param bool $non_negative Whether or not to disable negative values.
     * @note Maybe should be put in another class.
     */
-    public function validate($non_negative = false, $config, $context) {
+    protected function validate($non_negative = false) {
        // Special case:
+        if ($this->n === '+0' || $this->n === '-0') $this->n = '0';
        if ($this->n === '0' && $this->unit === false) return true;
        if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit);
        if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false;
+        // Hack:
        $def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
-        $result = $def->validate($this->n, $config, $context);
+        $result = $def->validate($this->n, false, false);
        if ($result === false) return false;
        $this->n = $result;
        return true;
@ -67,7 +74,26 @@ class HTMLPurifier_Length
     * Returns string representation of number.
     */
    public function toString() {
+        if (!$this->isValid()) return false;
        return $this->n . $this->unit;
    }
    
+    /**
+     * Retrieves string numeric magnitude.
+     */
+    public function getN() {return $this->n;}
+    
+    /**
+     * Retrieves string unit.
+     */
+    public function getUnit() {return $this->unit;}
+    
+    /**
+     * Returns true if this length unit is valid.
+     */
+    public function isValid($non_negative = false) {
+        if ($this->isValid === null) $this->isValid = $this->validate($non_negative);
+        return $this->isValid;
+    }
+    
 }
--- a/library/HTMLPurifier/UnitConverter.php
+++ b/library/HTMLPurifier/UnitConverter.php
@ -50,6 +50,11 @@ class HTMLPurifier_UnitConverter
    
    /**
     * Converts a length object of one unit into another unit.
+     * @param HTMLPurifier_Length $length
+     *      Instance of HTMLPurifier_Length to convert. You must validate()
+     *      it before passing it here!
+     * @param string $to_unit
+     *      Unit to convert to.
     * @note
     *      About precision: This conversion function pays very special
     *      attention to the incoming precision of values and attempts
@ -60,39 +65,35 @@ class HTMLPurifier_UnitConverter
     *          - If a number contains less than four sigfigs ($outputPrecision)
     *            and this causes some decimals to be excluded, those
     *            decimals will be added on.
-     *          - Significant digits will be ignored for quantities greater
-     *            than one. This is a limitation of BCMath and I don't
-     *            feel like coding around it.
     */
    public function convert($length, $to_unit) {
-        if ($length->n === '0' || $length->unit === false) {
-            return new HTMLPurifier_Length('0', $unit);
+        
+        if (!$length->isValid()) return false;
+        
+        $n    = $length->getN();
+        $unit = $length->getUnit();
+        
+        if ($n === '0' || $unit === false) {
+            return new HTMLPurifier_Length('0', false);
        }
        
-        $state = $dest = false;
+        $state = $dest_state = false;
        foreach (self::$units as $k => $x) {
-            if (isset($x[$length->unit])) $state = $k;
+            if (isset($x[$unit])) $state = $k;
            if (isset($x[$to_unit])) $dest_state = $k;
        }
        if (!$state || !$dest_state) return false;
        
-        $n    = $length->n;
-        $unit = $length->unit;
-        
        // Some calculations about the initial precision of the number;
        // this will be useful when we need to do final rounding.
-        $log = (int) floor(log($n, 10));
-        if (strpos($n, '.') === false) {
-            $sigfigs = strlen(trim($n, '0+-'));
-        } else {
-            $sigfigs = strlen(ltrim($n, '0+-')) - 1; // eliminate extra decimal character
-        }
+        $sigfigs = $this->getSigFigs($n);
        if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision;
        
        // BCMath's internal precision deals only with decimals. Use
        // our default if the initial number has no decimals, or increase
        // it by how ever many decimals, thus, the number of guard digits
        // will always be greater than or equal to internalPrecision.
+        $log = (int) floor(log(abs($n), 10));
        $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
        
        for ($i = 0; $i < 2; $i++) {
@ -113,7 +114,7 @@ class HTMLPurifier_UnitConverter
                $unit = $dest_unit;
            }
            
-            // Output was zero, so bail out early
+            // Output was zero, so bail out early. Shouldn't ever happen.
            if ($n === '') {
                $n = '0';
                $unit = $to_unit;
@ -148,17 +149,21 @@ class HTMLPurifier_UnitConverter
        // Calculate how many decimals we need ($rp)
        // Calculations will always be carried to the decimal; this is
        // a limitation with BC (we can't set the scale to be negative)
-        $new_log = (int) floor(log($n, 10));
+        $new_log = (int) floor(log(abs($n), 10));
        $rp = $sigfigs - $new_log - 1;
-        //echo "----\n";
-        //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n";
+        $neg = $n < 0 ? '-' : '';
+        
+        // Useful for debugging:
+        //echo "<pre>n";
+        //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
+        
        if ($rp >= 0) {
-            $n = bcadd($n, '0.' .  str_repeat('0', $rp) . '5', $rp + 1);
+            $n = bcadd($n, $neg . '0.' .  str_repeat('0', $rp) . '5', $rp + 1);
            $n = bcdiv($n, '1', $rp);
        } else {
            if ($new_log + 1 >= $sigfigs) {
-                $n = bcadd($n, '5' . str_repeat('0', $new_log - $sigfigs));
-                $n = substr($n, 0, $sigfigs) . str_repeat('0', $new_log + 1 - $sigfigs);
+                $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs));
+                $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log + 1 - $sigfigs);
            }
        }
        if (strpos($n, '.') !== false) $n = rtrim($n, '0');
@ -167,4 +172,21 @@ class HTMLPurifier_UnitConverter
        return new HTMLPurifier_Length($n, $unit);
    }
    
+    /**
+     * Returns the number of significant figures in a string number.
+     * @param string $n Decimal number
+     * @return int number of sigfigs
+     */
+    public function getSigFigs($n) {
+        $n = ltrim($n, '0+-');
+        $dp = strpos($n, '.'); // decimal position
+        if ($dp === false) {
+            $sigfigs = strlen(rtrim($n, '0'));
+        } else {
+            $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
+            if ($dp !== 0) $sigfigs--;
+        }
+        return $sigfigs;
+    }
+    
 }
--- a/tests/HTMLPurifier/LengthTest.php
+++ b/tests/HTMLPurifier/LengthTest.php
@ -5,14 +5,14 @@ class HTMLPurifier_LengthTest extends HTMLPurifier_Harness
    
    function testConstruct() {
        $l = new HTMLPurifier_Length('23', 'in');
-        $this->assertIdentical($l->n, '23');
-        $this->assertIdentical($l->unit, 'in');
+        $this->assertIdentical($l->getN(), '23');
+        $this->assertIdentical($l->getUnit(), 'in');
    }
    
    function testMake() {
        $l = HTMLPurifier_Length::make('+23.4in');
-        $this->assertIdentical($l->n, '+23.4');
-        $this->assertIdentical($l->unit, 'in');
+        $this->assertIdentical($l->getN(), '+23.4');
+        $this->assertIdentical($l->getUnit(), 'in');
    }
    
    function testToString() {
@ -23,13 +23,15 @@ class HTMLPurifier_LengthTest extends HTMLPurifier_Harness
    protected function assertValidate($string, $expect = true, $disable_negative = false) {
        if ($expect === true) $expect = $string;
        $l = HTMLPurifier_Length::make($string);
-        $result = $l->validate($disable_negative, $this->config, $this->context);
+        $result = $l->isValid($disable_negative);
        if ($result === false) $this->assertIdentical($expect, false);
        else $this->assertIdentical($l->toString(), $expect);
    }
    
    function testValidate() {
        $this->assertValidate('0');
+        $this->assertValidate('+0', '0');
+        $this->assertValidate('-0', '0');
        $this->assertValidate('0px');
        $this->assertValidate('4.5px');
        $this->assertValidate('-4.5px');
--- a/tests/HTMLPurifier/UnitConverterTest.php
+++ b/tests/HTMLPurifier/UnitConverterTest.php
@ -3,12 +3,35 @@
 class HTMLPurifier_UnitConverterTest extends HTMLPurifier_Harness
 {
    
-    protected function assertConversion($input, $expect) {
-        $input = HTMLPurifier_Length::make($input);
-        $expect = HTMLPurifier_Length::make($expect);
+    protected function assertConversion($input, $expect, $unit = null, $test_negative = true) {
+        $length = HTMLPurifier_Length::make($input);
+        if ($expect !== false) $expectl = HTMLPurifier_Length::make($expect);
+        else $expectl = false;
        $converter = new HTMLPurifier_UnitConverter();
-        $result = $converter->convert($input, $expect->unit);
-        $this->assertIdentical($result, $expect);
+        $result = $converter->convert($length, $unit !== null ? $unit : $expectl->getUnit());
+        $this->assertIdentical($result, $expectl);
+        if ($test_negative) {
+            $this->assertConversion(
+                "-$input",
+                $expect === false ? false : "-$expect",
+                $unit,
+                false
+            );
+        }
+    }
+    
+    function testFail() {
+        $this->assertConversion('1in', false, 'foo');
+        $this->assertConversion('1foo', false, 'in');
+    }
+    
+    function testZero() {
+        $this->assertConversion('0', '0', 'in', false);
+        $this->assertConversion('-0', '0', 'in', false);
+        $this->assertConversion('0in', '0', 'in', false);
+        $this->assertConversion('-0in', '0', 'in', false);
+        $this->assertConversion('0in', '0', 'pt', false);
+        $this->assertConversion('-0in', '0', 'pt', false);
    }
    
    function testEnglish() {
@ -39,7 +62,8 @@ class HTMLPurifier_UnitConverterTest extends HTMLPurifier_Harness
        $this->assertConversion('0.3937in', '1cm');
    }
    
-    function testRounding() {
+    function testRoundingMinPrecision() {
+        // One sig-fig, modified to be four, conversion rounds up
        $this->assertConversion('100pt', '1.389in');
        $this->assertConversion('1000pt', '13.89in');
        $this->assertConversion('10000pt', '138.9in');
@ -47,4 +71,43 @@ class HTMLPurifier_UnitConverterTest extends HTMLPurifier_Harness
        $this->assertConversion('1000000pt', '13890in');
    }
    
+    function testRoundingUserPrecision() {
+        // Five sig-figs, conversion rounds down
+        $this->assertConversion('11112000pt', '154330in');
+        $this->assertConversion('1111200pt', '15433in');
+        $this->assertConversion('111120pt', '1543.3in');
+        $this->assertConversion('11112pt', '154.33in');
+        $this->assertConversion('1111.2pt', '15.433in');
+        $this->assertConversion('111.12pt', '1.5433in');
+        $this->assertConversion('11.112pt', '0.15433in');
+    }
+    
+    protected function assertSigFig($n, $sigfigs) {
+        $converter = new HTMLPurifier_UnitConverter();
+        $result = $converter->getSigFigs($n);
+        $this->assertIdentical($result, $sigfigs);
+    }
+    
+    function test_getSigFigs() {
+        $this->assertSigFig('0', 0);
+        $this->assertSigFig('1', 1);
+        $this->assertSigFig('-1', 1);
+        $this->assertSigFig('+1', 1);
+        $this->assertSigFig('01', 1);
+        $this->assertSigFig('001', 1);
+        $this->assertSigFig('12', 2);
+        $this->assertSigFig('012', 2);
+        $this->assertSigFig('10', 1);
+        $this->assertSigFig('10.', 2);
+        $this->assertSigFig('100.', 3);
+        $this->assertSigFig('103', 3);
+        $this->assertSigFig('130', 2);
+        $this->assertSigFig('.1', 1);
+        $this->assertSigFig('0.1', 1);
+        $this->assertSigFig('00.1', 1);
+        $this->assertSigFig('0.01', 1);
+        $this->assertSigFig('0.010', 2);
+        $this->assertSigFig('0.012', 2);
+    }
+    
 }