ECICharset.php 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. <?php
  2. /**
  3. * Class ECICharset
  4. *
  5. * @created 21.01.2021
  6. * @author ZXing Authors
  7. * @author smiley <smiley@chillerlan.net>
  8. * @copyright 2021 smiley
  9. * @license Apache-2.0
  10. */
  11. namespace chillerlan\QRCode\Common;
  12. use chillerlan\QRCode\QRCodeException;
  13. use function sprintf;
  14. /**
  15. * ISO/IEC 18004:2000 - 8.4.1 Extended Channel Interpretation (ECI) Mode
  16. */
  17. final class ECICharset{
  18. public const CP437 = 0; // Code page 437, DOS Latin US
  19. public const ISO_IEC_8859_1_GLI = 1; // GLI encoding with characters 0 to 127 identical to ISO/IEC 646 and characters 128 to 255 identical to ISO 8859-1
  20. public const CP437_WO_GLI = 2; // An equivalent code table to CP437, without the return-to-GLI 0 logic
  21. public const ISO_IEC_8859_1 = 3; // Latin-1 (Default)
  22. public const ISO_IEC_8859_2 = 4; // Latin-2
  23. public const ISO_IEC_8859_3 = 5; // Latin-3
  24. public const ISO_IEC_8859_4 = 6; // Latin-4
  25. public const ISO_IEC_8859_5 = 7; // Latin/Cyrillic
  26. public const ISO_IEC_8859_6 = 8; // Latin/Arabic
  27. public const ISO_IEC_8859_7 = 9; // Latin/Greek
  28. public const ISO_IEC_8859_8 = 10; // Latin/Hebrew
  29. public const ISO_IEC_8859_9 = 11; // Latin-5
  30. public const ISO_IEC_8859_10 = 12; // Latin-6
  31. public const ISO_IEC_8859_11 = 13; // Latin/Thai
  32. // 14 reserved
  33. public const ISO_IEC_8859_13 = 15; // Latin-7 (Baltic Rim)
  34. public const ISO_IEC_8859_14 = 16; // Latin-8 (Celtic)
  35. public const ISO_IEC_8859_15 = 17; // Latin-9
  36. public const ISO_IEC_8859_16 = 18; // Latin-10
  37. // 19 reserved
  38. public const SHIFT_JIS = 20; // JIS X 0208 Annex 1 + JIS X 0201
  39. public const WINDOWS_1250_LATIN_2 = 21; // Superset of Latin-2, Central Europe
  40. public const WINDOWS_1251_CYRILLIC = 22; // Latin/Cyrillic
  41. public const WINDOWS_1252_LATIN_1 = 23; // Superset of Latin-1
  42. public const WINDOWS_1256_ARABIC = 24;
  43. public const ISO_IEC_10646_UCS_2 = 25; // High order byte first (UTF-16BE)
  44. public const ISO_IEC_10646_UTF_8 = 26; // UTF-8
  45. public const ISO_IEC_646_1991 = 27; // International Reference Version of ISO 7-bit coded character set (US-ASCII)
  46. public const BIG5 = 28; // Big 5 (Taiwan) Chinese Character Set
  47. public const GB18030 = 29; // GB (PRC) Chinese Character Set
  48. public const EUC_KR = 30; // Korean Character Set
  49. /**
  50. * map of charset id -> name
  51. *
  52. * @see \mb_list_encodings()
  53. */
  54. public const MB_ENCODINGS = [
  55. self::CP437 => null,
  56. self::ISO_IEC_8859_1_GLI => null,
  57. self::CP437_WO_GLI => null,
  58. self::ISO_IEC_8859_1 => 'ISO-8859-1',
  59. self::ISO_IEC_8859_2 => 'ISO-8859-2',
  60. self::ISO_IEC_8859_3 => 'ISO-8859-3',
  61. self::ISO_IEC_8859_4 => 'ISO-8859-4',
  62. self::ISO_IEC_8859_5 => 'ISO-8859-5',
  63. self::ISO_IEC_8859_6 => 'ISO-8859-6',
  64. self::ISO_IEC_8859_7 => 'ISO-8859-7',
  65. self::ISO_IEC_8859_8 => 'ISO-8859-8',
  66. self::ISO_IEC_8859_9 => 'ISO-8859-9',
  67. self::ISO_IEC_8859_10 => 'ISO-8859-10',
  68. self::ISO_IEC_8859_11 => null,
  69. self::ISO_IEC_8859_13 => 'ISO-8859-13',
  70. self::ISO_IEC_8859_14 => 'ISO-8859-14',
  71. self::ISO_IEC_8859_15 => 'ISO-8859-15',
  72. self::ISO_IEC_8859_16 => 'ISO-8859-16',
  73. self::SHIFT_JIS => 'SJIS',
  74. self::WINDOWS_1250_LATIN_2 => null, // @see https://www.php.net/manual/en/function.mb-convert-encoding.php#112547
  75. self::WINDOWS_1251_CYRILLIC => 'Windows-1251',
  76. self::WINDOWS_1252_LATIN_1 => 'Windows-1252',
  77. self::WINDOWS_1256_ARABIC => null, // @see https://stackoverflow.com/a/8592995
  78. self::ISO_IEC_10646_UCS_2 => 'UTF-16BE',
  79. self::ISO_IEC_10646_UTF_8 => 'UTF-8',
  80. self::ISO_IEC_646_1991 => 'ASCII',
  81. self::BIG5 => 'BIG-5',
  82. self::GB18030 => 'GB18030',
  83. self::EUC_KR => 'EUC-KR',
  84. ];
  85. /**
  86. * The current ECI character set ID
  87. */
  88. private int $charsetID;
  89. /**
  90. * @throws \chillerlan\QRCode\QRCodeException
  91. */
  92. public function __construct(int $charsetID){
  93. if($charsetID < 0 || $charsetID > 999999){
  94. throw new QRCodeException(sprintf('invalid charset id: "%s"', $charsetID));
  95. }
  96. $this->charsetID = $charsetID;
  97. }
  98. /**
  99. * Returns the current character set ID
  100. */
  101. public function getID():int{
  102. return $this->charsetID;
  103. }
  104. /**
  105. * Returns the name of the current character set or null if no name is available
  106. *
  107. * @see \mb_convert_encoding()
  108. * @see \iconv()
  109. */
  110. public function getName():string|null{
  111. return (self::MB_ENCODINGS[$this->charsetID] ?? null);
  112. }
  113. }