ISO639.php 9.3 KB


  1. <?php
  2. /**
  3. * Part of Text_LanguageDetect
  4. *
  5. * PHP version 5
  6. *
  7. * @category Text
  8. * @package Text_LanguageDetect
  9. * @author Christian Weiske <[email protected]>
  10. * @copyright 2011 Christian Weiske <[email protected]>
  11. * @license http://www.debian.org/misc/bsd.license BSD
  12. * @link http://pear.php.net/package/Text_LanguageDetect/
  13. */
  14. /**
  15. * Provides a mapping between the languages from lang.dat and the
  16. * ISO 639-1 and ISO-639-2 codes.
  17. *
  18. * Note that this class contains only languages that exist in lang.dat.
  19. *
  20. * @category Text
  21. * @package Text_LanguageDetect
  22. * @author Christian Weiske <[email protected]>
  23. * @copyright 2011 Christian Weiske <[email protected]>
  24. * @license BSD http://www.opensource.org/licenses/bsd-license.php
  25. * @link http://www.loc.gov/standards/iso639-2/php/code_list.php
  26. *
  27. * @SuppressWarnings(PHPMD)
  28. */
  29. class Text_LanguageDetect_ISO639
  30. {
  31. /**
  32. * Maps all language names from the language database to the
  33. * ISO 639-1 2-letter language code.
  34. *
  35. * NULL indicates that there is no 2-letter code.
  36. *
  37. * @var array
  38. */
  39. public static $nameToCode2 = array(
  40. 'albanian' => 'sq',
  41. 'arabic' => 'ar',
  42. 'azeri' => 'az',
  43. 'bengali' => 'bn',
  44. 'bulgarian' => 'bg',
  45. 'cebuano' => null,
  46. 'croatian' => 'hr',
  47. 'czech' => 'cs',
  48. 'danish' => 'da',
  49. 'dutch' => 'nl',
  50. 'english' => 'en',
  51. 'estonian' => 'et',
  52. 'farsi' => 'fa',
  53. 'finnish' => 'fi',
  54. 'french' => 'fr',
  55. 'german' => 'de',
  56. 'hausa' => 'ha',
  57. 'hawaiian' => null,
  58. 'hindi' => 'hi',
  59. 'hungarian' => 'hu',
  60. 'icelandic' => 'is',
  61. 'indonesian' => 'id',
  62. 'italian' => 'it',
  63. 'kazakh' => 'kk',
  64. 'kyrgyz' => 'ky',
  65. 'latin' => 'la',
  66. 'latvian' => 'lv',
  67. 'lithuanian' => 'lt',
  68. 'macedonian' => 'mk',
  69. 'mongolian' => 'mn',
  70. 'nepali' => 'ne',
  71. 'norwegian' => 'no',
  72. 'pashto' => 'ps',
  73. 'pidgin' => null,
  74. 'polish' => 'pl',
  75. 'portuguese' => 'pt',
  76. 'romanian' => 'ro',
  77. 'russian' => 'ru',
  78. 'serbian' => 'sr',
  79. 'slovak' => 'sk',
  80. 'slovene' => 'sl',
  81. 'somali' => 'so',
  82. 'spanish' => 'es',
  83. 'swahili' => 'sw',
  84. 'swedish' => 'sv',
  85. 'tagalog' => 'tl',
  86. 'turkish' => 'tr',
  87. 'ukrainian' => 'uk',
  88. 'urdu' => 'ur',
  89. 'uzbek' => 'uz',
  90. 'vietnamese' => 'vi',
  91. 'welsh' => 'cy',
  92. );
  93. /**
  94. * Maps all language names from the language database to the
  95. * ISO 639-2 3-letter language code.
  96. *
  97. * @var array
  98. */
  99. public static $nameToCode3 = array(
  100. 'albanian' => 'sqi',
  101. 'arabic' => 'ara',
  102. 'azeri' => 'aze',
  103. 'bengali' => 'ben',
  104. 'bulgarian' => 'bul',
  105. 'cebuano' => 'ceb',
  106. 'croatian' => 'hrv',
  107. 'czech' => 'ces',
  108. 'danish' => 'dan',
  109. 'dutch' => 'nld',
  110. 'english' => 'eng',
  111. 'estonian' => 'est',
  112. 'farsi' => 'fas',
  113. 'finnish' => 'fin',
  114. 'french' => 'fra',
  115. 'german' => 'deu',
  116. 'hausa' => 'hau',
  117. 'hawaiian' => 'haw',
  118. 'hindi' => 'hin',
  119. 'hungarian' => 'hun',
  120. 'icelandic' => 'isl',
  121. 'indonesian' => 'ind',
  122. 'italian' => 'ita',
  123. 'kazakh' => 'kaz',
  124. 'kyrgyz' => 'kir',
  125. 'latin' => 'lat',
  126. 'latvian' => 'lav',
  127. 'lithuanian' => 'lit',
  128. 'macedonian' => 'mkd',
  129. 'mongolian' => 'mon',
  130. 'nepali' => 'nep',
  131. 'norwegian' => 'nor',
  132. 'pashto' => 'pus',
  133. 'pidgin' => 'crp',
  134. 'polish' => 'pol',
  135. 'portuguese' => 'por',
  136. 'romanian' => 'ron',
  137. 'russian' => 'rus',
  138. 'serbian' => 'srp',
  139. 'slovak' => 'slk',
  140. 'slovene' => 'slv',
  141. 'somali' => 'som',
  142. 'spanish' => 'spa',
  143. 'swahili' => 'swa',
  144. 'swedish' => 'swe',
  145. 'tagalog' => 'tgl',
  146. 'turkish' => 'tur',
  147. 'ukrainian' => 'ukr',
  148. 'urdu' => 'urd',
  149. 'uzbek' => 'uzb',
  150. 'vietnamese' => 'vie',
  151. 'welsh' => 'cym',
  152. );
  153. /**
  154. * Maps ISO 639-1 2-letter language codes to the language names
  155. * in the language database
  156. *
  157. * Not all languages have a 2 letter code, so some are missing
  158. *
  159. * @var array
  160. */
  161. public static $code2ToName = array(
  162. 'ar' => 'arabic',
  163. 'az' => 'azeri',
  164. 'bg' => 'bulgarian',
  165. 'bn' => 'bengali',
  166. 'cs' => 'czech',
  167. 'cy' => 'welsh',
  168. 'da' => 'danish',
  169. 'de' => 'german',
  170. 'en' => 'english',
  171. 'es' => 'spanish',
  172. 'et' => 'estonian',
  173. 'fa' => 'farsi',
  174. 'fi' => 'finnish',
  175. 'fr' => 'french',
  176. 'ha' => 'hausa',
  177. 'hi' => 'hindi',
  178. 'hr' => 'croatian',
  179. 'hu' => 'hungarian',
  180. 'id' => 'indonesian',
  181. 'is' => 'icelandic',
  182. 'it' => 'italian',
  183. 'kk' => 'kazakh',
  184. 'ky' => 'kyrgyz',
  185. 'la' => 'latin',
  186. 'lt' => 'lithuanian',
  187. 'lv' => 'latvian',
  188. 'mk' => 'macedonian',
  189. 'mn' => 'mongolian',
  190. 'ne' => 'nepali',
  191. 'nl' => 'dutch',
  192. 'no' => 'norwegian',
  193. 'pl' => 'polish',
  194. 'ps' => 'pashto',
  195. 'pt' => 'portuguese',
  196. 'ro' => 'romanian',
  197. 'ru' => 'russian',
  198. 'sk' => 'slovak',
  199. 'sl' => 'slovene',
  200. 'so' => 'somali',
  201. 'sq' => 'albanian',
  202. 'sr' => 'serbian',
  203. 'sv' => 'swedish',
  204. 'sw' => 'swahili',
  205. 'tl' => 'tagalog',
  206. 'tr' => 'turkish',
  207. 'uk' => 'ukrainian',
  208. 'ur' => 'urdu',
  209. 'uz' => 'uzbek',
  210. 'vi' => 'vietnamese',
  211. );
  212. /**
  213. * Maps ISO 639-2 3-letter language codes to the language names
  214. * in the language database.
  215. *
  216. * @var array
  217. */
  218. public static $code3ToName = array(
  219. 'ara' => 'arabic',
  220. 'aze' => 'azeri',
  221. 'ben' => 'bengali',
  222. 'bul' => 'bulgarian',
  223. 'ceb' => 'cebuano',
  224. 'ces' => 'czech',
  225. 'crp' => 'pidgin',
  226. 'cym' => 'welsh',
  227. 'dan' => 'danish',
  228. 'deu' => 'german',
  229. 'eng' => 'english',
  230. 'est' => 'estonian',
  231. 'fas' => 'farsi',
  232. 'fin' => 'finnish',
  233. 'fra' => 'french',
  234. 'hau' => 'hausa',
  235. 'haw' => 'hawaiian',
  236. 'hin' => 'hindi',
  237. 'hrv' => 'croatian',
  238. 'hun' => 'hungarian',
  239. 'ind' => 'indonesian',
  240. 'isl' => 'icelandic',
  241. 'ita' => 'italian',
  242. 'kaz' => 'kazakh',
  243. 'kir' => 'kyrgyz',
  244. 'lat' => 'latin',
  245. 'lav' => 'latvian',
  246. 'lit' => 'lithuanian',
  247. 'mkd' => 'macedonian',
  248. 'mon' => 'mongolian',
  249. 'nep' => 'nepali',
  250. 'nld' => 'dutch',
  251. 'nor' => 'norwegian',
  252. 'pol' => 'polish',
  253. 'por' => 'portuguese',
  254. 'pus' => 'pashto',
  255. 'rom' => 'romanian',
  256. 'rus' => 'russian',
  257. 'slk' => 'slovak',
  258. 'slv' => 'slovene',
  259. 'som' => 'somali',
  260. 'spa' => 'spanish',
  261. 'sqi' => 'albanian',
  262. 'srp' => 'serbian',
  263. 'swa' => 'swahili',
  264. 'swe' => 'swedish',
  265. 'tgl' => 'tagalog',
  266. 'tur' => 'turkish',
  267. 'ukr' => 'ukrainian',
  268. 'urd' => 'urdu',
  269. 'uzb' => 'uzbek',
  270. 'vie' => 'vietnamese',
  271. );
  272. /**
  273. * Returns the 2-letter ISO 639-1 code for the given language name.
  274. *
  275. * @param string $lang English language name like "swedish"
  276. *
  277. * @return string Two-letter language code (e.g. "sv") or NULL if not found
  278. */
  279. public static function nameToCode2($lang)
  280. {
  281. $lang = strtolower($lang);
  282. if (!isset(self::$nameToCode2[$lang])) {
  283. return null;
  284. }
  285. return self::$nameToCode2[$lang];
  286. }
  287. /**
  288. * Returns the 3-letter ISO 639-2 code for the given language name.
  289. *
  290. * @param string $lang English language name like "swedish"
  291. *
  292. * @return string Three-letter language code (e.g. "swe") or NULL if not found
  293. */
  294. public static function nameToCode3($lang)
  295. {
  296. $lang = strtolower($lang);
  297. if (!isset(self::$nameToCode3[$lang])) {
  298. return null;
  299. }
  300. return self::$nameToCode3[$lang];
  301. }
  302. /**
  303. * Returns the language name for the given 2-letter ISO 639-1 code.
  304. *
  305. * @param string $code Two-letter language code (e.g. "sv")
  306. *
  307. * @return string English language name like "swedish"
  308. */
  309. public static function code2ToName($code)
  310. {
  311. $lang = strtolower($code);
  312. if (!isset(self::$code2ToName[$code])) {
  313. return null;
  314. }
  315. return self::$code2ToName[$code];
  316. }
  317. /**
  318. * Returns the language name for the given 3-letter ISO 639-2 code.
  319. *
  320. * @param string $code Three-letter language code (e.g. "swe")
  321. *
  322. * @return string English language name like "swedish"
  323. */
  324. public static function code3ToName($code)
  325. {
  326. $lang = strtolower($code);
  327. if (!isset(self::$code3ToName[$code])) {
  328. return null;
  329. }
  330. return self::$code3ToName[$code];
  331. }
  332. }