summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-10 17:06:22 -0500
committerTechnosophos <[email protected]>2013-04-10 17:06:22 -0500
commit503a3d00cdf2358cc66ce63959ce6dd5f6abf953 (patch)
treedd01a8aea9b7efa022bfec8d21dd4c7126e500ed /src
parent36ff33f79ce45f9080ce88b9750a9009868a9a13 (diff)
Finished CharacterReference class.
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/Parser/CharacterReference.php21
1 files changed, 19 insertions, 2 deletions
diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php
index 72e1815..cc71f8f 100644
--- a/src/HTML5/Parser/CharacterReference.php
+++ b/src/HTML5/Parser/CharacterReference.php
@@ -1,10 +1,15 @@
<?php
namespace HTML5\Parser;
+use \HTML5\Entities;
+
/**
* Manage entity references.
*/
class CharacterReference {
+
+ protected static $numeric_mask = array(0x0, 0x2FFFF, 0, 0xFFFF);
+
/**
* Given a name (e.g. 'amp'), lookup the UTF-8 character ('&')
*
@@ -14,24 +19,36 @@ class CharacterReference {
* The character sequence. In UTF-8 this may be more than one byte.
*/
public static function lookupName($name) {
- return '';
+ $char = Entities::$byName[$name];
+
+ return $char;
}
+
/**
* Given a Unicode codepoint, return the UTF-8 character.
+ *
+ * (NOT USED ANYWHERE)
*/
+ /*
public static function lookupCode($codePoint) {
- return '';
+ return 'POINT';
}
+ */
/**
* Given a decimal number, return the UTF-8 character.
*/
public static function lookupDecimal($int) {
+ $entity = '&#' . $int . ';';
+ // UNTESTED: This may fail on some planes. Couldn't find full documentation
+ // on the value of the mask array.
+ return mb_decode_numericentity($entity, self::$numeric_mask, 'utf-8');
}
/**
* Given a hexidecimal number, return the UTF-8 character.
*/
public static function lookupHex($hexdec) {
+ return self::lookupDecimal(hexdec($hexdec));
}
}