summaryrefslogtreecommitdiff
path: root/plugins/af_lang_detect/languagedetect/data/unicode_blocks.php
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/af_lang_detect/languagedetect/data/unicode_blocks.php')
-rw-r--r--plugins/af_lang_detect/languagedetect/data/unicode_blocks.php873
1 files changed, 873 insertions, 0 deletions
diff --git a/plugins/af_lang_detect/languagedetect/data/unicode_blocks.php b/plugins/af_lang_detect/languagedetect/data/unicode_blocks.php
new file mode 100644
index 000000000..04b6fabac
--- /dev/null
+++ b/plugins/af_lang_detect/languagedetect/data/unicode_blocks.php
@@ -0,0 +1,873 @@
+<?php
+return array (
+ 0 =>
+ array (
+ 0 => 0x0000,
+ 1 => 0x007F,
+ 2 => 'Basic Latin',
+ ),
+ 1 =>
+ array (
+ 0 => 0x0080,
+ 1 => 0x00FF,
+ 2 => 'Latin-1 Supplement',
+ ),
+ 2 =>
+ array (
+ 0 => 0x0100,
+ 1 => 0x017F,
+ 2 => 'Latin Extended-A',
+ ),
+ 3 =>
+ array (
+ 0 => 0x0180,
+ 1 => 0x024F,
+ 2 => 'Latin Extended-B',
+ ),
+ 4 =>
+ array (
+ 0 => 0x0250,
+ 1 => 0x02AF,
+ 2 => 'IPA Extensions',
+ ),
+ 5 =>
+ array (
+ 0 => 0x02B0,
+ 1 => 0x02FF,
+ 2 => 'Spacing Modifier Letters',
+ ),
+ 6 =>
+ array (
+ 0 => 0x0300,
+ 1 => 0x036F,
+ 2 => 'Combining Diacritical Marks',
+ ),
+ 7 =>
+ array (
+ 0 => 0x0370,
+ 1 => 0x03FF,
+ 2 => 'Greek and Coptic',
+ ),
+ 8 =>
+ array (
+ 0 => 0x0400,
+ 1 => 0x04FF,
+ 2 => 'Cyrillic',
+ ),
+ 9 =>
+ array (
+ 0 => 0x0500,
+ 1 => 0x052F,
+ 2 => 'Cyrillic Supplement',
+ ),
+ 10 =>
+ array (
+ 0 => 0x0530,
+ 1 => 0x058F,
+ 2 => 'Armenian',
+ ),
+ 11 =>
+ array (
+ 0 => 0x0590,
+ 1 => 0x05FF,
+ 2 => 'Hebrew',
+ ),
+ 12 =>
+ array (
+ 0 => 0x0600,
+ 1 => 0x06FF,
+ 2 => 'Arabic',
+ ),
+ 13 =>
+ array (
+ 0 => 0x0700,
+ 1 => 0x074F,
+ 2 => 'Syriac',
+ ),
+ 14 =>
+ array (
+ 0 => 0x0750,
+ 1 => 0x077F,
+ 2 => 'Arabic Supplement',
+ ),
+ 15 =>
+ array (
+ 0 => 0x0780,
+ 1 => 0x07BF,
+ 2 => 'Thaana',
+ ),
+ 16 =>
+ array (
+ 0 => 0x0900,
+ 1 => 0x097F,
+ 2 => 'Devanagari',
+ ),
+ 17 =>
+ array (
+ 0 => 0x0980,
+ 1 => 0x09FF,
+ 2 => 'Bengali',
+ ),
+ 18 =>
+ array (
+ 0 => 0x0A00,
+ 1 => 0x0A7F,
+ 2 => 'Gurmukhi',
+ ),
+ 19 =>
+ array (
+ 0 => 0x0A80,
+ 1 => 0x0AFF,
+ 2 => 'Gujarati',
+ ),
+ 20 =>
+ array (
+ 0 => 0x0B00,
+ 1 => 0x0B7F,
+ 2 => 'Oriya',
+ ),
+ 21 =>
+ array (
+ 0 => 0x0B80,
+ 1 => 0x0BFF,
+ 2 => 'Tamil',
+ ),
+ 22 =>
+ array (
+ 0 => 0x0C00,
+ 1 => 0x0C7F,
+ 2 => 'Telugu',
+ ),
+ 23 =>
+ array (
+ 0 => 0x0C80,
+ 1 => 0x0CFF,
+ 2 => 'Kannada',
+ ),
+ 24 =>
+ array (
+ 0 => 0x0D00,
+ 1 => 0x0D7F,
+ 2 => 'Malayalam',
+ ),
+ 25 =>
+ array (
+ 0 => 0x0D80,
+ 1 => 0x0DFF,
+ 2 => 'Sinhala',
+ ),
+ 26 =>
+ array (
+ 0 => 0x0E00,
+ 1 => 0x0E7F,
+ 2 => 'Thai',
+ ),
+ 27 =>
+ array (
+ 0 => 0x0E80,
+ 1 => 0x0EFF,
+ 2 => 'Lao',
+ ),
+ 28 =>
+ array (
+ 0 => 0x0F00,
+ 1 => 0x0FFF,
+ 2 => 'Tibetan',
+ ),
+ 29 =>
+ array (
+ 0 => 0x1000,
+ 1 => 0x109F,
+ 2 => 'Myanmar',
+ ),
+ 30 =>
+ array (
+ 0 => 0x10A0,
+ 1 => 0x10FF,
+ 2 => 'Georgian',
+ ),
+ 31 =>
+ array (
+ 0 => 0x1100,
+ 1 => 0x11FF,
+ 2 => 'Hangul Jamo',
+ ),
+ 32 =>
+ array (
+ 0 => 0x1200,
+ 1 => 0x137F,
+ 2 => 'Ethiopic',
+ ),
+ 33 =>
+ array (
+ 0 => 0x1380,
+ 1 => 0x139F,
+ 2 => 'Ethiopic Supplement',
+ ),
+ 34 =>
+ array (
+ 0 => 0x13A0,
+ 1 => 0x13FF,
+ 2 => 'Cherokee',
+ ),
+ 35 =>
+ array (
+ 0 => 0x1400,
+ 1 => 0x167F,
+ 2 => 'Unified Canadian Aboriginal Syllabics',
+ ),
+ 36 =>
+ array (
+ 0 => 0x1680,
+ 1 => 0x169F,
+ 2 => 'Ogham',
+ ),
+ 37 =>
+ array (
+ 0 => 0x16A0,
+ 1 => 0x16FF,
+ 2 => 'Runic',
+ ),
+ 38 =>
+ array (
+ 0 => 0x1700,
+ 1 => 0x171F,
+ 2 => 'Tagalog',
+ ),
+ 39 =>
+ array (
+ 0 => 0x1720,
+ 1 => 0x173F,
+ 2 => 'Hanunoo',
+ ),
+ 40 =>
+ array (
+ 0 => 0x1740,
+ 1 => 0x175F,
+ 2 => 'Buhid',
+ ),
+ 41 =>
+ array (
+ 0 => 0x1760,
+ 1 => 0x177F,
+ 2 => 'Tagbanwa',
+ ),
+ 42 =>
+ array (
+ 0 => 0x1780,
+ 1 => 0x17FF,
+ 2 => 'Khmer',
+ ),
+ 43 =>
+ array (
+ 0 => 0x1800,
+ 1 => 0x18AF,
+ 2 => 'Mongolian',
+ ),
+ 44 =>
+ array (
+ 0 => 0x1900,
+ 1 => 0x194F,
+ 2 => 'Limbu',
+ ),
+ 45 =>
+ array (
+ 0 => 0x1950,
+ 1 => 0x197F,
+ 2 => 'Tai Le',
+ ),
+ 46 =>
+ array (
+ 0 => 0x1980,
+ 1 => 0x19DF,
+ 2 => 'New Tai Lue',
+ ),
+ 47 =>
+ array (
+ 0 => 0x19E0,
+ 1 => 0x19FF,
+ 2 => 'Khmer Symbols',
+ ),
+ 48 =>
+ array (
+ 0 => 0x1A00,
+ 1 => 0x1A1F,
+ 2 => 'Buginese',
+ ),
+ 49 =>
+ array (
+ 0 => 0x1D00,
+ 1 => 0x1D7F,
+ 2 => 'Phonetic Extensions',
+ ),
+ 50 =>
+ array (
+ 0 => 0x1D80,
+ 1 => 0x1DBF,
+ 2 => 'Phonetic Extensions Supplement',
+ ),
+ 51 =>
+ array (
+ 0 => 0x1DC0,
+ 1 => 0x1DFF,
+ 2 => 'Combining Diacritical Marks Supplement',
+ ),
+ 52 =>
+ array (
+ 0 => 0x1E00,
+ 1 => 0x1EFF,
+ 2 => 'Latin Extended Additional',
+ ),
+ 53 =>
+ array (
+ 0 => 0x1F00,
+ 1 => 0x1FFF,
+ 2 => 'Greek Extended',
+ ),
+ 54 =>
+ array (
+ 0 => 0x2000,
+ 1 => 0x206F,
+ 2 => 'General Punctuation',
+ ),
+ 55 =>
+ array (
+ 0 => 0x2070,
+ 1 => 0x209F,
+ 2 => 'Superscripts and Subscripts',
+ ),
+ 56 =>
+ array (
+ 0 => 0x20A0,
+ 1 => 0x20CF,
+ 2 => 'Currency Symbols',
+ ),
+ 57 =>
+ array (
+ 0 => 0x20D0,
+ 1 => 0x20FF,
+ 2 => 'Combining Diacritical Marks for Symbols',
+ ),
+ 58 =>
+ array (
+ 0 => 0x2100,
+ 1 => 0x214F,
+ 2 => 'Letterlike Symbols',
+ ),
+ 59 =>
+ array (
+ 0 => 0x2150,
+ 1 => 0x218F,
+ 2 => 'Number Forms',
+ ),
+ 60 =>
+ array (
+ 0 => 0x2190,
+ 1 => 0x21FF,
+ 2 => 'Arrows',
+ ),
+ 61 =>
+ array (
+ 0 => 0x2200,
+ 1 => 0x22FF,
+ 2 => 'Mathematical Operators',
+ ),
+ 62 =>
+ array (
+ 0 => 0x2300,
+ 1 => 0x23FF,
+ 2 => 'Miscellaneous Technical',
+ ),
+ 63 =>
+ array (
+ 0 => 0x2400,
+ 1 => 0x243F,
+ 2 => 'Control Pictures',
+ ),
+ 64 =>
+ array (
+ 0 => 0x2440,
+ 1 => 0x245F,
+ 2 => 'Optical Character Recognition',
+ ),
+ 65 =>
+ array (
+ 0 => 0x2460,
+ 1 => 0x24FF,
+ 2 => 'Enclosed Alphanumerics',
+ ),
+ 66 =>
+ array (
+ 0 => 0x2500,
+ 1 => 0x257F,
+ 2 => 'Box Drawing',
+ ),
+ 67 =>
+ array (
+ 0 => 0x2580,
+ 1 => 0x259F,
+ 2 => 'Block Elements',
+ ),
+ 68 =>
+ array (
+ 0 => 0x25A0,
+ 1 => 0x25FF,
+ 2 => 'Geometric Shapes',
+ ),
+ 69 =>
+ array (
+ 0 => 0x2600,
+ 1 => 0x26FF,
+ 2 => 'Miscellaneous Symbols',
+ ),
+ 70 =>
+ array (
+ 0 => 0x2700,
+ 1 => 0x27BF,
+ 2 => 'Dingbats',
+ ),
+ 71 =>
+ array (
+ 0 => 0x27C0,
+ 1 => 0x27EF,
+ 2 => 'Miscellaneous Mathematical Symbols-A',
+ ),
+ 72 =>
+ array (
+ 0 => 0x27F0,
+ 1 => 0x27FF,
+ 2 => 'Supplemental Arrows-A',
+ ),
+ 73 =>
+ array (
+ 0 => 0x2800,
+ 1 => 0x28FF,
+ 2 => 'Braille Patterns',
+ ),
+ 74 =>
+ array (
+ 0 => 0x2900,
+ 1 => 0x297F,
+ 2 => 'Supplemental Arrows-B',
+ ),
+ 75 =>
+ array (
+ 0 => 0x2980,
+ 1 => 0x29FF,
+ 2 => 'Miscellaneous Mathematical Symbols-B',
+ ),
+ 76 =>
+ array (
+ 0 => 0x2A00,
+ 1 => 0x2AFF,
+ 2 => 'Supplemental Mathematical Operators',
+ ),
+ 77 =>
+ array (
+ 0 => 0x2B00,
+ 1 => 0x2BFF,
+ 2 => 'Miscellaneous Symbols and Arrows',
+ ),
+ 78 =>
+ array (
+ 0 => 0x2C00,
+ 1 => 0x2C5F,
+ 2 => 'Glagolitic',
+ ),
+ 79 =>
+ array (
+ 0 => 0x2C80,
+ 1 => 0x2CFF,
+ 2 => 'Coptic',
+ ),
+ 80 =>
+ array (
+ 0 => 0x2D00,
+ 1 => 0x2D2F,
+ 2 => 'Georgian Supplement',
+ ),
+ 81 =>
+ array (
+ 0 => 0x2D30,
+ 1 => 0x2D7F,
+ 2 => 'Tifinagh',
+ ),
+ 82 =>
+ array (
+ 0 => 0x2D80,
+ 1 => 0x2DDF,
+ 2 => 'Ethiopic Extended',
+ ),
+ 83 =>
+ array (
+ 0 => 0x2E00,
+ 1 => 0x2E7F,
+ 2 => 'Supplemental Punctuation',
+ ),
+ 84 =>
+ array (
+ 0 => 0x2E80,
+ 1 => 0x2EFF,
+ 2 => 'CJK Radicals Supplement',
+ ),
+ 85 =>
+ array (
+ 0 => 0x2F00,
+ 1 => 0x2FDF,
+ 2 => 'Kangxi Radicals',
+ ),
+ 86 =>
+ array (
+ 0 => 0x2FF0,
+ 1 => 0x2FFF,
+ 2 => 'Ideographic Description Characters',
+ ),
+ 87 =>
+ array (
+ 0 => 0x3000,
+ 1 => 0x303F,
+ 2 => 'CJK Symbols and Punctuation',
+ ),
+ 88 =>
+ array (
+ 0 => 0x3040,
+ 1 => 0x309F,
+ 2 => 'Hiragana',
+ ),
+ 89 =>
+ array (
+ 0 => 0x30A0,
+ 1 => 0x30FF,
+ 2 => 'Katakana',
+ ),
+ 90 =>
+ array (
+ 0 => 0x3100,
+ 1 => 0x312F,
+ 2 => 'Bopomofo',
+ ),
+ 91 =>
+ array (
+ 0 => 0x3130,
+ 1 => 0x318F,
+ 2 => 'Hangul Compatibility Jamo',
+ ),
+ 92 =>
+ array (
+ 0 => 0x3190,
+ 1 => 0x319F,
+ 2 => 'Kanbun',
+ ),
+ 93 =>
+ array (
+ 0 => 0x31A0,
+ 1 => 0x31BF,
+ 2 => 'Bopomofo Extended',
+ ),
+ 94 =>
+ array (
+ 0 => 0x31C0,
+ 1 => 0x31EF,
+ 2 => 'CJK Strokes',
+ ),
+ 95 =>
+ array (
+ 0 => 0x31F0,
+ 1 => 0x31FF,
+ 2 => 'Katakana Phonetic Extensions',
+ ),
+ 96 =>
+ array (
+ 0 => 0x3200,
+ 1 => 0x32FF,
+ 2 => 'Enclosed CJK Letters and Months',
+ ),
+ 97 =>
+ array (
+ 0 => 0x3300,
+ 1 => 0x33FF,
+ 2 => 'CJK Compatibility',
+ ),
+ 98 =>
+ array (
+ 0 => 0x3400,
+ 1 => 0x4DBF,
+ 2 => 'CJK Unified Ideographs Extension A',
+ ),
+ 99 =>
+ array (
+ 0 => 0x4DC0,
+ 1 => 0x4DFF,
+ 2 => 'Yijing Hexagram Symbols',
+ ),
+ 100 =>
+ array (
+ 0 => 0x4E00,
+ 1 => 0x9FFF,
+ 2 => 'CJK Unified Ideographs',
+ ),
+ 101 =>
+ array (
+ 0 => 0xA000,
+ 1 => 0xA48F,
+ 2 => 'Yi Syllables',
+ ),
+ 102 =>
+ array (
+ 0 => 0xA490,
+ 1 => 0xA4CF,
+ 2 => 'Yi Radicals',
+ ),
+ 103 =>
+ array (
+ 0 => 0xA700,
+ 1 => 0xA71F,
+ 2 => 'Modifier Tone Letters',
+ ),
+ 104 =>
+ array (
+ 0 => 0xA800,
+ 1 => 0xA82F,
+ 2 => 'Syloti Nagri',
+ ),
+ 105 =>
+ array (
+ 0 => 0xAC00,
+ 1 => 0xD7AF,
+ 2 => 'Hangul Syllables',
+ ),
+ 106 =>
+ array (
+ 0 => 0xD800,
+ 1 => 0xDB7F,
+ 2 => 'High Surrogates',
+ ),
+ 107 =>
+ array (
+ 0 => 0xDB80,
+ 1 => 0xDBFF,
+ 2 => 'High Private Use Surrogates',
+ ),
+ 108 =>
+ array (
+ 0 => 0xDC00,
+ 1 => 0xDFFF,
+ 2 => 'Low Surrogates',
+ ),
+ 109 =>
+ array (
+ 0 => 0xE000,
+ 1 => 0xF8FF,
+ 2 => 'Private Use Area',
+ ),
+ 110 =>
+ array (
+ 0 => 0xF900,
+ 1 => 0xFAFF,
+ 2 => 'CJK Compatibility Ideographs',
+ ),
+ 111 =>
+ array (
+ 0 => 0xFB00,
+ 1 => 0xFB4F,
+ 2 => 'Alphabetic Presentation Forms',
+ ),
+ 112 =>
+ array (
+ 0 => 0xFB50,
+ 1 => 0xFDFF,
+ 2 => 'Arabic Presentation Forms-A',
+ ),
+ 113 =>
+ array (
+ 0 => 0xFE00,
+ 1 => 0xFE0F,
+ 2 => 'Variation Selectors',
+ ),
+ 114 =>
+ array (
+ 0 => 0xFE10,
+ 1 => 0xFE1F,
+ 2 => 'Vertical Forms',
+ ),
+ 115 =>
+ array (
+ 0 => 0xFE20,
+ 1 => 0xFE2F,
+ 2 => 'Combining Half Marks',
+ ),
+ 116 =>
+ array (
+ 0 => 0xFE30,
+ 1 => 0xFE4F,
+ 2 => 'CJK Compatibility Forms',
+ ),
+ 117 =>
+ array (
+ 0 => 0xFE50,
+ 1 => 0xFE6F,
+ 2 => 'Small Form Variants',
+ ),
+ 118 =>
+ array (
+ 0 => 0xFE70,
+ 1 => 0xFEFF,
+ 2 => 'Arabic Presentation Forms-B',
+ ),
+ 119 =>
+ array (
+ 0 => 0xFF00,
+ 1 => 0xFFEF,
+ 2 => 'Halfwidth and Fullwidth Forms',
+ ),
+ 120 =>
+ array (
+ 0 => 0xFFF0,
+ 1 => 0xFFFF,
+ 2 => 'Specials',
+ ),
+ 121 =>
+ array (
+ 0 => 0x10000,
+ 1 => 0x1007F,
+ 2 => 'Linear B Syllabary',
+ ),
+ 122 =>
+ array (
+ 0 => 0x10080,
+ 1 => 0x100FF,
+ 2 => 'Linear B Ideograms',
+ ),
+ 123 =>
+ array (
+ 0 => 0x10100,
+ 1 => 0x1013F,
+ 2 => 'Aegean Numbers',
+ ),
+ 124 =>
+ array (
+ 0 => 0x10140,
+ 1 => 0x1018F,
+ 2 => 'Ancient Greek Numbers',
+ ),
+ 125 =>
+ array (
+ 0 => 0x10300,
+ 1 => 0x1032F,
+ 2 => 'Old Italic',
+ ),
+ 126 =>
+ array (
+ 0 => 0x10330,
+ 1 => 0x1034F,
+ 2 => 'Gothic',
+ ),
+ 127 =>
+ array (
+ 0 => 0x10380,
+ 1 => 0x1039F,
+ 2 => 'Ugaritic',
+ ),
+ 128 =>
+ array (
+ 0 => 0x103A0,
+ 1 => 0x103DF,
+ 2 => 'Old Persian',
+ ),
+ 129 =>
+ array (
+ 0 => 0x10400,
+ 1 => 0x1044F,
+ 2 => 'Deseret',
+ ),
+ 130 =>
+ array (
+ 0 => 0x10450,
+ 1 => 0x1047F,
+ 2 => 'Shavian',
+ ),
+ 131 =>
+ array (
+ 0 => 0x10480,
+ 1 => 0x104AF,
+ 2 => 'Osmanya',
+ ),
+ 132 =>
+ array (
+ 0 => 0x10800,
+ 1 => 0x1083F,
+ 2 => 'Cypriot Syllabary',
+ ),
+ 133 =>
+ array (
+ 0 => 0x10A00,
+ 1 => 0x10A5F,
+ 2 => 'Kharoshthi',
+ ),
+ 134 =>
+ array (
+ 0 => 0x1D000,
+ 1 => 0x1D0FF,
+ 2 => 'Byzantine Musical Symbols',
+ ),
+ 135 =>
+ array (
+ 0 => 0x1D100,
+ 1 => 0x1D1FF,
+ 2 => 'Musical Symbols',
+ ),
+ 136 =>
+ array (
+ 0 => 0x1D200,
+ 1 => 0x1D24F,
+ 2 => 'Ancient Greek Musical Notation',
+ ),
+ 137 =>
+ array (
+ 0 => 0x1D300,
+ 1 => 0x1D35F,
+ 2 => 'Tai Xuan Jing Symbols',
+ ),
+ 138 =>
+ array (
+ 0 => 0x1D400,
+ 1 => 0x1D7FF,
+ 2 => 'Mathematical Alphanumeric Symbols',
+ ),
+ 139 =>
+ array (
+ 0 => 0x20000,
+ 1 => 0x2A6DF,
+ 2 => 'CJK Unified Ideographs Extension B',
+ ),
+ 140 =>
+ array (
+ 0 => 0x2F800,
+ 1 => 0x2FA1F,
+ 2 => 'CJK Compatibility Ideographs Supplement',
+ ),
+ 141 =>
+ array (
+ 0 => 0xE0000,
+ 1 => 0xE007F,
+ 2 => 'Tags',
+ ),
+ 142 =>
+ array (
+ 0 => 0xE0100,
+ 1 => 0xE01EF,
+ 2 => 'Variation Selectors Supplement',
+ ),
+ 143 =>
+ array (
+ 0 => 0xF0000,
+ 1 => 0xFFFFF,
+ 2 => 'Supplementary Private Use Area-A',
+ ),
+ 144 =>
+ array (
+ 0 => 0x100000,
+ 1 => 0x10FFFF,
+ 2 => 'Supplementary Private Use Area-B',
+ ),
+);