KanjiTest.php 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. <?php
  2. /**
  3. * Class KanjiTest
  4. *
  5. * @created 24.11.2017
  6. * @author Smiley <smiley@chillerlan.net>
  7. * @copyright 2017 Smiley
  8. * @license MIT
  9. */
  10. declare(strict_types=1);
  11. namespace chillerlan\QRCodeTest\Data;
  12. use chillerlan\QRCode\Data\Kanji;
  13. use chillerlan\QRCode\Data\QRDataModeInterface;
  14. use Generator, Throwable;
  15. use PHPUnit\Framework\Attributes\DataProvider;
  16. use PHPUnit\Framework\Attributes\Group;
  17. use function bin2hex, chr, defined, sprintf;
  18. /**
  19. * Tests the Kanji class
  20. */
  21. final class KanjiTest extends DataInterfaceTestAbstract{
  22. protected const testData = '漂う花の香り';
  23. protected static function getDataModeInterface(string $data):QRDataModeInterface{
  24. return new Kanji($data);
  25. }
  26. /**
  27. * isKanji() should pass on Kanji/SJIS characters and fail on everything else
  28. *
  29. * @phpstan-return array<int, array{0: string, 1: bool}>
  30. */
  31. public static function stringValidateProvider():array{
  32. return [
  33. ['茗荷', true],
  34. ['Ã', false], // this will fail in SJIS-2004
  35. ['ABC', false],
  36. ['123', false],
  37. ['漂う花の香り', true], // https://genshin-impact.fandom.com/wiki/Floral_Incense
  38. ['꽃잎 향초의 기도', false], // same as above in korean
  39. ];
  40. }
  41. /**
  42. * lists the valid SJIS kanji
  43. */
  44. public static function kanjiProvider():Generator{
  45. $key = fn(int $byte1, int $byte2):string => sprintf('0x%X', (($byte1 << 8) | $byte2));
  46. $val = fn(int $byte1, int $byte2):string => mb_convert_encoding(chr($byte1).chr($byte2), 'UTF-8', Kanji::ENCODING);
  47. for($byte1 = 0x81; $byte1 < 0xeb; $byte1++){
  48. // skip invalid/vendor ranges
  49. if(($byte1 > 0x84 && $byte1 < 0x88) || ($byte1 > 0x9f && $byte1 < 0xe0)){
  50. continue;
  51. }
  52. // second byte of a double-byte JIS X 0208 character whose first half of the JIS sequence was odd
  53. if(($byte1 % 2) !== 0){
  54. for($byte2 = 0x40; $byte2 < 0x9f; $byte2++){
  55. if($byte2 === 0x7f){
  56. continue;
  57. }
  58. $chr = $val($byte1, $byte2);
  59. if($chr === '?'){ // skip unknown glyphs
  60. continue;
  61. }
  62. yield $key($byte1, $byte2) => [$chr];
  63. }
  64. }
  65. // second byte if the first half of the JIS sequence was even
  66. else{
  67. for($byte2 = 0x9f; $byte2 < 0xfd; $byte2++){
  68. $chr = $val($byte1, $byte2);
  69. if($chr === '?'){
  70. continue;
  71. }
  72. yield $key($byte1, $byte2) => [$chr];
  73. }
  74. }
  75. }
  76. }
  77. #[Group('slow')]
  78. #[DataProvider('kanjiProvider')]
  79. public function testValidateSJIS(string $chr):void{
  80. // we may run into several issues due to encoding detection failures
  81. try{
  82. $this::assertTrue(Kanji::validateString($chr));
  83. }
  84. catch(Throwable){
  85. /** @noinspection PhpUndefinedConstantInspection - see phpunit.xml.dist */
  86. if(defined('TEST_IS_CI') && TEST_IS_CI === true){
  87. $this::markTestSkipped();
  88. }
  89. $this::markTestSkipped(sprintf(
  90. 'invalid glyph: %s => %s',
  91. bin2hex(mb_convert_encoding($chr, Kanji::ENCODING, 'UTF-8')),
  92. $chr,
  93. ));
  94. }
  95. }
  96. }