ECICharset.php 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. <?php
  2. /**
  3. * Class ECICharset
  4. *
  5. * @created 21.01.2021
  6. * @author smiley <smiley@chillerlan.net>
  7. * @copyright 2021 smiley
  8. * @license MIT
  9. */
  10. namespace chillerlan\QRCode\Common;
  11. use chillerlan\QRCode\QRCodeException;
  12. use function array_key_exists;
  13. /**
  14. * ISO/IEC 18004:2000 - 8.4.1 Extended Channel Interpretation (ECI) Mode
  15. */
  16. final class ECICharset{
  17. public const CP437 = 0; // Code page 437, DOS Latin US
  18. public const ISO_IEC_8859_1_GLI = 1; // GLI encoding with characters 0 to 127 identical to ISO/IEC 646 and characters 128 to 255 identical to ISO 8859-1
  19. public const CP437_WO_GLI = 2; // An equivalent code table to CP437, without the return-to-GLI 0 logic
  20. public const ISO_IEC_8859_1 = 3; // Latin-1 (Default)
  21. public const ISO_IEC_8859_2 = 4; // Latin-2
  22. public const ISO_IEC_8859_3 = 5; // Latin-3
  23. public const ISO_IEC_8859_4 = 6; // Latin-4
  24. public const ISO_IEC_8859_5 = 7; // Latin/Cyrillic
  25. public const ISO_IEC_8859_6 = 8; // Latin/Arabic
  26. public const ISO_IEC_8859_7 = 9; // Latin/Greek
  27. public const ISO_IEC_8859_8 = 10; // Latin/Hebrew
  28. public const ISO_IEC_8859_9 = 11; // Latin-5
  29. public const ISO_IEC_8859_10 = 12; // Latin-6
  30. public const ISO_IEC_8859_11 = 13; // Latin/Thai
  31. // 14 reserved
  32. public const ISO_IEC_8859_13 = 15; // Latin-7 (Baltic Rim)
  33. public const ISO_IEC_8859_14 = 16; // Latin-8 (Celtic)
  34. public const ISO_IEC_8859_15 = 17; // Latin-9
  35. public const ISO_IEC_8859_16 = 18; // Latin-10
  36. // 19 reserved
  37. public const SHIFT_JIS = 20; // JIS X 0208 Annex 1 + JIS X 0201
  38. public const WINDOWS_1250_LATIN_2 = 21; // Superset of Latin-2, Central Europe
  39. public const WINDOWS_1251_CYRILLIC = 22; // Latin/Cyrillic
  40. public const WINDOWS_1252_LATIN_1 = 23; // Superset of Latin-1
  41. public const WINDOWS_1256_ARABIC = 24;
  42. public const ISO_IEC_10646_UCS_2 = 25; // High order byte first (UTF-16BE)
  43. public const ISO_IEC_10646_UTF_8 = 26; // UTF-8
  44. public const ISO_IEC_646_1991 = 27; // International Reference Version of ISO 7-bit coded character set (US-ASCII)
  45. public const BIG5 = 28; // Big 5 (Taiwan) Chinese Character Set
  46. public const GB18030 = 29; // GB (PRC) Chinese Character Set
  47. public const EUC_KR = 30; // Korean Character Set
  48. /**
  49. * map of charset id -> name
  50. *
  51. * @see \mb_list_encodings()
  52. */
  53. public const MB_ENCODINGS = [
  54. self::CP437 => null,
  55. self::ISO_IEC_8859_1_GLI => null,
  56. self::CP437_WO_GLI => null,
  57. self::ISO_IEC_8859_1 => 'ISO-8859-1',
  58. self::ISO_IEC_8859_2 => 'ISO-8859-2',
  59. self::ISO_IEC_8859_3 => 'ISO-8859-3',
  60. self::ISO_IEC_8859_4 => 'ISO-8859-4',
  61. self::ISO_IEC_8859_5 => 'ISO-8859-5',
  62. self::ISO_IEC_8859_6 => 'ISO-8859-6',
  63. self::ISO_IEC_8859_7 => 'ISO-8859-7',
  64. self::ISO_IEC_8859_8 => 'ISO-8859-8',
  65. self::ISO_IEC_8859_9 => 'ISO-8859-9',
  66. self::ISO_IEC_8859_10 => 'ISO-8859-10',
  67. self::ISO_IEC_8859_11 => null,
  68. self::ISO_IEC_8859_13 => 'ISO-8859-13',
  69. self::ISO_IEC_8859_14 => 'ISO-8859-14',
  70. self::ISO_IEC_8859_15 => 'ISO-8859-15',
  71. self::ISO_IEC_8859_16 => 'ISO-8859-16',
  72. self::SHIFT_JIS => 'SJIS',
  73. self::WINDOWS_1250_LATIN_2 => null, // @see https://www.php.net/manual/en/function.mb-convert-encoding.php#112547
  74. self::WINDOWS_1251_CYRILLIC => 'Windows-1251',
  75. self::WINDOWS_1252_LATIN_1 => 'Windows-1252',
  76. self::WINDOWS_1256_ARABIC => null, // @see https://stackoverflow.com/a/8592995
  77. self::ISO_IEC_10646_UCS_2 => 'UTF-16BE',
  78. self::ISO_IEC_10646_UTF_8 => 'UTF-8',
  79. self::ISO_IEC_646_1991 => 'ASCII',
  80. self::BIG5 => 'BIG-5',
  81. self::GB18030 => 'GB18030',
  82. self::EUC_KR => 'EUC-KR',
  83. ];
  84. /**
  85. * The current ECI character set ID
  86. */
  87. private int $charsetID;
  88. /**
  89. * @throws \chillerlan\QRCode\QRCodeException
  90. */
  91. public function __construct(int $charsetID){
  92. if(!array_key_exists($charsetID, self::MB_ENCODINGS)){
  93. throw new QRCodeException('invalid charset id: '.$charsetID);
  94. }
  95. $this->charsetID = $charsetID;
  96. }
  97. /**
  98. * Returns the current character set ID
  99. */
  100. public function getID():int{
  101. return $this->charsetID;
  102. }
  103. /**
  104. * Returns the name of the current character set or null if no name is available
  105. *
  106. * @see \mb_convert_encoding()
  107. * @see \iconv()
  108. */
  109. public function getName():?string{
  110. return self::MB_ENCODINGS[$this->charsetID];
  111. }
  112. }