KanjiTest.php 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. <?php
  2. /**
  3. * Class KanjiTest
  4. *
  5. * @created 24.11.2017
  6. * @author Smiley <smiley@chillerlan.net>
  7. * @copyright 2017 Smiley
  8. * @license MIT
  9. */
  10. namespace chillerlan\QRCodeTest\Data;
  11. use chillerlan\QRCode\Data\Kanji;
  12. use Throwable;
  13. use function array_map, bin2hex, chr, defined, mb_internal_encoding, sprintf;
  14. /**
  15. * Tests the Kanji class
  16. */
  17. final class KanjiTest extends DataInterfaceTestAbstract{
  18. protected string $FQN = Kanji::class;
  19. protected string $testdata = '漂う花の香り';
  20. /**
  21. * isKanji() should pass on Kanji/SJIS characters and fail on everything else
  22. */
  23. public function stringValidateProvider():array{
  24. return [
  25. ['茗荷', true],
  26. ['Ã', false], // this will fail in SJIS-2004
  27. ['ABC', false],
  28. ['123', false],
  29. ['漂う花の香り', true], // https://genshin-impact.fandom.com/wiki/Floral_Incense
  30. ['꽃잎 향초의 기도', false], // same as above in korean
  31. ];
  32. }
  33. /**
  34. * lists the valid SJIS kanj
  35. */
  36. public function kanjiProvider():array{
  37. $list = [];
  38. for($byte1 = 0x81; $byte1 < 0xeb; $byte1 += 0x1){
  39. // skip invalid/vendor ranges
  40. if(($byte1 > 0x84 && $byte1 < 0x88) || ($byte1 > 0x9f && $byte1 < 0xe0)){
  41. continue;
  42. }
  43. // second byte of a double-byte JIS X 0208 character whose first half of the JIS sequence was odd
  44. if(($byte1 % 2) !== 0){
  45. for($byte2 = 0x40; $byte2 < 0x9f; $byte2++){
  46. if($byte2 === 0x7f){
  47. continue;
  48. }
  49. $list[] = [chr($byte1).chr($byte2)];
  50. }
  51. }
  52. // second byte if the first half of the JIS sequence was even
  53. else{
  54. for($byte2 = 0x9f; $byte2 < 0xfd; $byte2++){
  55. $list[] = [chr($byte1).chr($byte2)];
  56. }
  57. }
  58. }
  59. // we need to put the joined byte sequence in a proper encoding
  60. return array_map(fn($chr) => mb_convert_encoding($chr, Kanji::ENCODING, Kanji::ENCODING), $list);
  61. }
  62. /**
  63. * @dataProvider kanjiProvider
  64. */
  65. public function testValidateSJIS(string $chr):void{
  66. // we may run into several issues due to encoding detection failures
  67. try{
  68. $this::assertTrue(Kanji::validateString($chr));
  69. }
  70. catch(Throwable $e){
  71. /** @noinspection PhpUndefinedConstantInspection - see phpunit.xml.dist */
  72. if(defined('TEST_IS_CI') && TEST_IS_CI === true){
  73. $this::markTestSkipped();
  74. }
  75. $this::markTestSkipped(sprintf(
  76. 'invalid glyph: %s => %s',
  77. bin2hex($chr),
  78. mb_convert_encoding($chr, Kanji::ENCODING, mb_internal_encoding())
  79. ));
  80. }
  81. }
  82. }