Kanji.php 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. <?php
  2. /**
  3. * Class Kanji
  4. *
  5. * @created 25.11.2015
  6. * @author Smiley <smiley@chillerlan.net>
  7. * @copyright 2015 Smiley
  8. * @license MIT
  9. */
  10. namespace chillerlan\QRCode\Data;
  11. use chillerlan\QRCode\Common\{BitBuffer, Mode};
  12. use function chr, implode, is_string, mb_convert_encoding, mb_detect_encoding,
  13. mb_detect_order, mb_internal_encoding, mb_strlen, ord, sprintf, strlen;
  14. /**
  15. * Kanji mode: double-byte characters from the Shift-JIS character set
  16. *
  17. * ISO/IEC 18004:2000 Section 8.3.5
  18. * ISO/IEC 18004:2000 Section 8.4.5
  19. *
  20. * @see https://en.wikipedia.org/wiki/Shift_JIS#As_defined_in_JIS_X_0208:1997
  21. * @see http://www.rikai.com/library/kanjitables/kanji_codes.sjis.shtml
  22. * @see https://gist.github.com/codemasher/d07d3e6e9346c08e7a41b8b978784952
  23. */
  24. final class Kanji extends QRDataModeAbstract{
  25. // SJIS, SJIS-2004
  26. // SJIS-2004 may produce errors in PHP < 8
  27. public const ENCODING = 'SJIS';
  28. /**
  29. * @inheritDoc
  30. */
  31. protected static int $datamode = Mode::KANJI;
  32. /**
  33. * @inheritDoc
  34. */
  35. protected function getCharCount():int{
  36. return mb_strlen($this->data, self::ENCODING);
  37. }
  38. /**
  39. * @inheritDoc
  40. */
  41. public function getLengthInBits():int{
  42. return $this->getCharCount() * 13;
  43. }
  44. /**
  45. * @inheritDoc
  46. */
  47. public static function convertEncoding(string $string):string{
  48. mb_detect_order([mb_internal_encoding(), 'UTF-8', 'SJIS', 'SJIS-2004']);
  49. $detected = mb_detect_encoding($string, null, true);
  50. if($detected === false){
  51. throw new QRCodeDataException('mb_detect_encoding error');
  52. }
  53. if($detected === self::ENCODING){
  54. return $string;
  55. }
  56. $string = mb_convert_encoding($string, self::ENCODING, $detected);
  57. if(!is_string($string)){
  58. throw new QRCodeDataException(sprintf('invalid encoding: %s', $detected));
  59. }
  60. return $string;
  61. }
  62. /**
  63. * checks if a string qualifies as SJIS Kanji
  64. */
  65. public static function validateString(string $string):bool{
  66. $string = self::convertEncoding($string);
  67. $len = strlen($string);
  68. if($len < 2 || $len % 2 !== 0){
  69. return false;
  70. }
  71. for($i = 0; $i < $len; $i += 2){
  72. $byte1 = ord($string[$i]);
  73. $byte2 = ord($string[$i + 1]);
  74. // byte 1 unused and vendor ranges
  75. if($byte1 < 0x81 || ($byte1 > 0x84 && $byte1 < 0x88) || ($byte1 > 0x9f && $byte1 < 0xe0) || $byte1 > 0xea){
  76. return false;
  77. }
  78. // byte 2 unused ranges
  79. if($byte2 < 0x40 || $byte2 === 0x7f || $byte2 > 0xfc){
  80. return false;
  81. }
  82. // byte 1 is even, second byte in range 0x9f - 0xfc
  83. if(($byte1 % 2) === 0){
  84. if($byte2 < 0x9f){
  85. return false;
  86. }
  87. }
  88. // byte 1 is odd, second byte in range 0x40 - 0x9e (technically)
  89. // now this is weird: according to spec, the second byte should be lower than 0x9e.
  90. // however, converting encodings back and forth seems to mess with the string somehow.
  91. // someone please riddle me this
  92. # else{
  93. # if($byte2 > 0x9e){
  94. # return false;
  95. # }
  96. # }
  97. }
  98. return true;
  99. }
  100. /**
  101. * @inheritDoc
  102. *
  103. * @throws \chillerlan\QRCode\Data\QRCodeDataException on an illegal character occurence
  104. */
  105. public function write(BitBuffer $bitBuffer, int $versionNumber):void{
  106. $bitBuffer
  107. ->put($this::$datamode, 4)
  108. ->put($this->getCharCount(), $this::getLengthBits($versionNumber))
  109. ;
  110. $len = strlen($this->data);
  111. for($i = 0; $i + 1 < $len; $i += 2){
  112. $c = ((0xff & ord($this->data[$i])) << 8) | (0xff & ord($this->data[$i + 1]));
  113. if($c >= 0x8140 && $c <= 0x9ffc){
  114. $c -= 0x8140;
  115. }
  116. elseif($c >= 0xe040 && $c <= 0xebbf){
  117. $c -= 0xc140;
  118. }
  119. else{
  120. throw new QRCodeDataException(sprintf('illegal char at %d [%d]', $i + 1, $c));
  121. }
  122. $bitBuffer->put(((($c >> 8) & 0xff) * 0xc0) + ($c & 0xff), 13);
  123. }
  124. if($i < $len){
  125. throw new QRCodeDataException(sprintf('illegal char at %d', $i + 1));
  126. }
  127. }
  128. /**
  129. * @inheritDoc
  130. *
  131. * @throws \chillerlan\QRCode\Data\QRCodeDataException
  132. */
  133. public static function decodeSegment(BitBuffer $bitBuffer, int $versionNumber):string{
  134. $length = $bitBuffer->read(self::getLengthBits($versionNumber));
  135. if($bitBuffer->available() < $length * 13){
  136. throw new QRCodeDataException('not enough bits available'); // @codeCoverageIgnore
  137. }
  138. // Each character will require 2 bytes. Read the characters as 2-byte pairs and decode as SJIS afterwards
  139. $buffer = [];
  140. $offset = 0;
  141. while($length > 0){
  142. // Each 13 bits encodes a 2-byte character
  143. $twoBytes = $bitBuffer->read(13);
  144. $assembledTwoBytes = ((int)($twoBytes / 0x0c0) << 8) | ($twoBytes % 0x0c0);
  145. $assembledTwoBytes += ($assembledTwoBytes < 0x01f00)
  146. ? 0x08140 // In the 0x8140 to 0x9FFC range
  147. : 0x0c140; // In the 0xE040 to 0xEBBF range
  148. $buffer[$offset] = chr(0xff & ($assembledTwoBytes >> 8));
  149. $buffer[$offset + 1] = chr(0xff & $assembledTwoBytes);
  150. $offset += 2;
  151. $length--;
  152. }
  153. return mb_convert_encoding(implode($buffer), mb_internal_encoding(), self::ENCODING);
  154. }
  155. }