Sfoglia il codice sorgente

:octocat: ECI mode rework

smiley 2 anni fa
parent
commit
05eaf4eca0

+ 4 - 4
src/Common/ECICharset.php

@@ -12,7 +12,7 @@
 namespace chillerlan\QRCode\Common;
 
 use chillerlan\QRCode\QRCodeException;
-use function array_key_exists;
+use function sprintf;
 
 /**
  * ISO/IEC 18004:2000 - 8.4.1 Extended Channel Interpretation (ECI) Mode
@@ -98,8 +98,8 @@ final class ECICharset{
 	 */
 	public function __construct(int $charsetID){
 
-		if(!array_key_exists($charsetID, self::MB_ENCODINGS)){
-			throw new QRCodeException('invalid charset id: '.$charsetID);
+		if($charsetID < 0 || $charsetID > 999999){
+			throw new QRCodeException(sprintf('invalid charset id: "%s"', $charsetID));
 		}
 
 		$this->charsetID = $charsetID;
@@ -119,7 +119,7 @@ final class ECICharset{
 	 * @see \iconv()
 	 */
 	public function getName():?string{
-		return self::MB_ENCODINGS[$this->charsetID];
+		return (self::MB_ENCODINGS[$this->charsetID] ?? null);
 	}
 
 }

+ 6 - 5
src/Common/Mode.php

@@ -10,7 +10,7 @@
 
 namespace chillerlan\QRCode\Common;
 
-use chillerlan\QRCode\Data\{AlphaNum, Byte, Hanzi, Kanji, Number};
+use chillerlan\QRCode\Data\{AlphaNum, Byte, ECI, Hanzi, Kanji, Number};
 use chillerlan\QRCode\QRCodeException;
 
 /**
@@ -48,10 +48,11 @@ final class Mode{
 	 */
 	public const LENGTH_BITS = [
 		self::NUMBER   => [10, 12, 14],
-		self::ALPHANUM => [9, 11, 13],
-		self::BYTE     => [8, 16, 16],
-		self::KANJI    => [8, 10, 12],
-		self::HANZI    => [8, 10, 12],
+		self::ALPHANUM => [ 9, 11, 13],
+		self::BYTE     => [ 8, 16, 16],
+		self::KANJI    => [ 8, 10, 12],
+		self::HANZI    => [ 8, 10, 12],
+		self::ECI      => [ 0,  0,  0],
 	];
 
 	/**

+ 76 - 20
src/Data/ECI.php

@@ -11,10 +11,13 @@
 namespace chillerlan\QRCode\Data;
 
 use chillerlan\QRCode\Common\{BitBuffer, ECICharset, Mode};
+use function mb_convert_encoding, mb_detect_encoding, mb_internal_encoding, sprintf;
 
 /**
  * Adds an ECI Designator
  *
+ * ISO/IEC 18004:2000 8.4.1.1
+ *
  * Please note that you have to take care for the correct data encoding when adding with QRCode::add*Segment()
  */
 final class ECI extends QRDataModeAbstract{
@@ -34,6 +37,11 @@ final class ECI extends QRDataModeAbstract{
 	 * @noinspection PhpMissingParentConstructorInspection
 	 */
 	public function __construct(int $encoding){
+
+		if($encoding < 0 || $encoding > 999999){
+			throw new QRCodeDataException(sprintf('invalid encoding id: "%s"', $encoding));
+		}
+
 		$this->encoding = $encoding;
 	}
 
@@ -41,41 +49,64 @@ final class ECI extends QRDataModeAbstract{
 	 * @inheritDoc
 	 */
 	public function getLengthInBits():int{
-		return 8;
+
+		if($this->encoding < 128){
+			return 8;
+		}
+
+		if($this->encoding < 16384){
+			return 16;
+		}
+
+		return 24;
 	}
 
 	/**
+	 * Writes an ECI designator to the bitbuffer
+	 *
 	 * @inheritDoc
 	 */
-	public function write(BitBuffer $bitBuffer, int $versionNumber):void{
-		$bitBuffer
-			->put($this::$datamode, 4)
-			->put($this->encoding, 8)
-		;
+	public function write(BitBuffer $bitBuffer, int $versionNumber):QRDataModeInterface{
+		$bitBuffer->put($this::$datamode, 4);
+
+		if($this->encoding < 128){
+			$bitBuffer->put($this->encoding, 8);
+		}
+		elseif($this->encoding < 16384){
+			$bitBuffer->put(($this->encoding | 0x8000), 16);
+		}
+		elseif($this->encoding < 1000000){
+			$bitBuffer->put(($this->encoding | 0xC00000), 24);
+		}
+
+		return $this;
 	}
 
 	/**
+	 * Reads and parses the value of an ECI designator
+	 *
 	 * @throws \chillerlan\QRCode\Data\QRCodeDataException
 	 */
 	public static function parseValue(BitBuffer $bitBuffer):ECICharset{
 		$firstByte = $bitBuffer->read(8);
 
-		if(($firstByte & 0x80) === 0){
-			// just one byte
-			return new ECICharset($firstByte & 0x7f);
+		// just one byte
+		if(($firstByte & 0b10000000) === 0){
+			$id = ($firstByte & 0b01111111);
 		}
-
-		if(($firstByte & 0xc0) === 0x80){
-			// two bytes
-			return new ECICharset((($firstByte & 0x3f) << 8) | $bitBuffer->read(8));
+		// two bytes
+		elseif(($firstByte & 0b11000000) === 0b10000000){
+			$id = ((($firstByte & 0b00111111) << 8) | $bitBuffer->read(8));
 		}
-
-		if(($firstByte & 0xe0) === 0xC0){
-			// three bytes
-			return new ECICharset((($firstByte & 0x1f) << 16) | $bitBuffer->read(16));
+		// three bytes
+		elseif(($firstByte & 0b11100000) === 0b11000000){
+			$id = ((($firstByte & 0b00011111) << 16) | $bitBuffer->read(16));
+		}
+		else{
+			throw new QRCodeDataException(sprintf('error decoding ECI value first byte: %08b', $firstByte)); // @codeCoverageIgnore
 		}
 
-		throw new QRCodeDataException('error decoding ECI value');
+		return new ECICharset($id);
 	}
 
 	/**
@@ -86,10 +117,35 @@ final class ECI extends QRDataModeAbstract{
 	}
 
 	/**
-	 * @codeCoverageIgnore Unused, but required as per interface
+	 * Reads and decodes the ECI designator including the following byte sequence
+	 *
+	 * @throws \chillerlan\QRCode\Data\QRCodeDataException
 	 */
 	public static function decodeSegment(BitBuffer $bitBuffer, int $versionNumber):string{
-		return '';
+		$eciCharset = self::parseValue($bitBuffer);
+		$nextMode   = $bitBuffer->read(4);
+
+		if($nextMode !== Mode::BYTE){
+			throw new QRCodeDataException(sprintf('ECI designator followed by invalid mode: "%04b"', $nextMode));
+		}
+
+		$data     = Byte::decodeSegment($bitBuffer, $versionNumber);
+		$encoding = $eciCharset->getName();
+
+		if($encoding === null){
+			// The spec isn't clear on this mode; see
+			// section 6.4.5: t does not say which encoding to assuming
+			// upon decoding. I have seen ISO-8859-1 used as well as
+			// Shift_JIS -- without anything like an ECI designator to
+			// give a hint.
+			$encoding = mb_detect_encoding($data, ['ISO-8859-1', 'Windows-1252', 'SJIS', 'UTF-8'], true);
+
+			if($encoding === false){
+				throw new QRCodeDataException('could not determine encoding in ECI mode'); // @codeCoverageIgnore
+			}
+		}
+
+		return mb_convert_encoding($data, mb_internal_encoding(), $encoding);
 	}
 
 }

+ 5 - 37
src/Decoder/Decoder.php

@@ -11,11 +11,11 @@
 
 namespace chillerlan\QRCode\Decoder;
 
-use chillerlan\QRCode\Common\{BitBuffer, EccLevel, ECICharset, MaskPattern, Mode, ReedSolomonDecoder, Version};
+use chillerlan\QRCode\Common\{BitBuffer, EccLevel, MaskPattern, Mode, ReedSolomonDecoder, Version};
 use chillerlan\QRCode\Data\{AlphaNum, Byte, ECI, Hanzi, Kanji, Number};
 use chillerlan\QRCode\Detector\Detector;
 use Throwable;
-use function chr, mb_convert_encoding, mb_detect_encoding, mb_internal_encoding, str_replace;
+use function chr, str_replace;
 
 /**
  * The main class which implements QR Code decoding -- as opposed to locating and extracting
@@ -29,7 +29,6 @@ final class Decoder{
 	private ?EccLevel    $eccLevel = null;
 	private ?MaskPattern $maskPattern = null;
 	private BitBuffer    $bitBuffer;
-	private ?ECICharset  $eciCharset = null;
 
 	/**
 	 * Decodes a QR Code represented as a BitMatrix.
@@ -92,7 +91,6 @@ final class Decoder{
 	 */
 	private function decodeBitStream(BitBuffer $bitBuffer):DecoderResult{
 		$this->bitBuffer  = $bitBuffer;
-		$this->eciCharset = null;
 		$versionNumber    = $this->version->getVersionNumber();
 		$symbolSequence   = -1;
 		$parityData       = -1;
@@ -103,12 +101,12 @@ final class Decoder{
 		while($this->bitBuffer->available() >= 4){
 			$datamode = $this->bitBuffer->read(4); // mode is encoded by 4 bits
 
-			// OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
+			// OK, assume we're done
 			if($datamode === Mode::TERMINATOR){
 				break;
 			}
 			elseif($datamode === Mode::ECI){
-				$this->eciCharset = ECI::parseValue($this->bitBuffer);
+				$result .= ECI::decodeSegment($this->bitBuffer, $versionNumber);
 			}
 			elseif($datamode === Mode::FNC1_FIRST || $datamode === Mode::FNC1_SECOND){
 				// We do little with FNC1 except alter the parsed result a bit according to the spec
@@ -131,7 +129,7 @@ final class Decoder{
 				$result .= $this->decodeAlphanumSegment($versionNumber, $fc1InEffect);
 			}
 			elseif($datamode === Mode::BYTE){
-				$result .= $this->decodeByteSegment($versionNumber);
+				$result .= Byte::decodeSegment($this->bitBuffer, $versionNumber);
 			}
 			elseif($datamode === Mode::KANJI){
 				$result .= Kanji::decodeSegment($this->bitBuffer, $versionNumber);
@@ -172,34 +170,4 @@ final class Decoder{
 		return $str;
 	}
 
-	/**
-	 * @throws \chillerlan\QRCode\Decoder\QRCodeDecoderException
-	 */
-	private function decodeByteSegment(int $versionNumber):string{
-		$str = Byte::decodeSegment($this->bitBuffer, $versionNumber);
-
-		if($this->eciCharset === null){
-			return $str;
-		}
-
-		$encoding = $this->eciCharset->getName();
-
-		if($encoding === null){
-			// The spec isn't clear on this mode; see
-			// section 6.4.5: t does not say which encoding to assuming
-			// upon decoding. I have seen ISO-8859-1 used as well as
-			// Shift_JIS -- without anything like an ECI designator to
-			// give a hint.
-			$encoding = mb_detect_encoding($str, ['ISO-8859-1', 'Windows-1252', 'SJIS', 'UTF-8'], true);
-
-			if($encoding === false){
-				throw new QRCodeDecoderException('could not determine encoding in ECI mode');
-			}
-		}
-
-		$this->eciCharset = null;
-
-		return mb_convert_encoding($str, mb_internal_encoding(), $encoding);
-	}
-
 }

+ 53 - 0
tests/Common/ECICharsetTest.php

@@ -0,0 +1,53 @@
+<?php
+/**
+ * ECICharsetTest.php
+ *
+ * @created      13.03.2023
+ * @author       smiley <smiley@chillerlan.net>
+ * @copyright    2023 smiley
+ * @license      MIT
+ */
+
+namespace chillerlan\QRCodeTest\Common;
+
+use chillerlan\QRCode\Common\ECICharset;
+use chillerlan\QRCode\QRCodeException;
+use PHPUnit\Framework\TestCase;
+
+class ECICharsetTest extends TestCase{
+
+	public static function invalidIdProvider():array{
+		return [[-1], [1000000]];
+	}
+
+	/**
+	 * @dataProvider invalidIdProvider
+	 */
+	public function testInvalidDataException(int $id):void{
+		$this->expectException(QRCodeException::class);
+		$this->expectExceptionMessage('invalid charset id:');
+		/** @phan-suppress-next-line PhanNoopNew */
+		new ECICharset($id);
+	}
+
+	public function encodingProvider():array{
+		$params = [];
+
+		foreach(ECICharset::MB_ENCODINGS as $id => $name){
+			$params[] = [$id, $name];
+		}
+
+		return $params;
+	}
+
+	/**
+	 * @dataProvider encodingProvider
+	 */
+	public function testGetName(int $id, string $name = null):void{
+		$eciCharset = new ECICharset($id);
+
+		$this::assertSame($id, $eciCharset->getID());
+		$this::assertSame($name, $eciCharset->getName());
+	}
+
+}

+ 193 - 0
tests/Data/ECITest.php

@@ -0,0 +1,193 @@
+<?php
+/**
+ * ECITest.php
+ *
+ * @created      12.03.2023
+ * @author       smiley <smiley@chillerlan.net>
+ * @copyright    2023 smiley
+ * @license      MIT
+ */
+
+namespace chillerlan\QRCodeTest\Data;
+
+use chillerlan\QRCode\QROptions;
+use chillerlan\QRCode\Common\{BitBuffer, ECICharset, MaskPattern, Mode};
+use chillerlan\QRCode\Data\{Byte, ECI, Number, QRCodeDataException, QRData, QRDataModeInterface, QRMatrix};
+
+/**
+ * Tests the ECI class
+ */
+final class ECITest extends DataInterfaceTestAbstract{
+
+	protected string $FQN         = ECI::class;
+	protected string $testdata    = '无可奈何燃花作香';
+	private int      $testCharset = ECICharset::GB18030;
+
+	private function getDataSegments():array{
+		return [
+			new $this->FQN($this->testCharset),
+			new Byte(mb_convert_encoding($this->testdata, ECICharset::MB_ENCODINGS[$this->testCharset], mb_internal_encoding())),
+		];
+	}
+
+	public static function stringValidateProvider():array{
+		return [];
+	}
+
+	/** @inheritDoc */
+	public function testDataModeInstance():void{
+		$datamode = new $this->FQN($this->testCharset);
+
+		$this::assertInstanceOf(QRDataModeInterface::class, $datamode);
+	}
+
+	/**
+	 * @inheritDoc
+	 * @dataProvider maskPatternProvider
+	 */
+	public function testInitMatrix(int $maskPattern):void{
+		$segments = $this->getDataSegments();
+
+		$this->QRData->setData($segments);
+
+		$matrix = $this->QRData->writeMatrix(new MaskPattern($maskPattern));
+
+		$this::assertInstanceOf(QRMatrix::class, $matrix);
+		$this::assertSame($maskPattern, $matrix->maskPattern()->getPattern());
+	}
+
+	/** @inheritDoc */
+	public function testGetMinimumVersion():void{
+		/** @noinspection PhpUnitTestFailedLineInspection */
+		$this::markTestSkipped('N/A (ECI mode)');
+	}
+
+	/** @inheritDoc */
+	public function testBinaryStringInvalid():void{
+		/** @noinspection PhpUnitTestFailedLineInspection */
+		$this::markTestSkipped('N/A (ECI mode)');
+	}
+
+	/**
+	 * @inheritDoc
+	 * @dataProvider versionBreakpointProvider
+	 */
+	public function testDecodeSegment(int $version):void{
+		$options = new QROptions;
+		$options->version = $version;
+
+		/** @var \chillerlan\QRCode\Data\QRDataModeInterface[] $segments */
+		$segments = $this->getDataSegments();
+
+		// invoke a QRData instance and write data
+		$this->QRData = new QRData($options, $segments);
+		// get the filled bitbuffer
+		$bitBuffer = $this->QRData->getBitBuffer();
+		// read the first 4 bits
+		$this::assertSame($segments[0]->getDataMode(), $bitBuffer->read(4));
+		// decode the data
+		/** @noinspection PhpUndefinedMethodInspection */
+		$this::assertSame($this->testdata, $this->FQN::decodeSegment($bitBuffer, $options->version));
+	}
+
+	/** @inheritDoc */
+	public function testGetMinimumVersionException():void{
+		/** @noinspection PhpUnitTestFailedLineInspection */
+		$this::markTestSkipped('N/A (ECI mode)');
+	}
+
+	/** @inheritDoc */
+	public function testCodeLengthOverflowException():void{
+		/** @noinspection PhpUnitTestFailedLineInspection */
+		$this::markTestSkipped('N/A (ECI mode)');
+	}
+
+	/** @inheritDoc */
+	public function testInvalidDataException():void{
+		$this->expectException(QRCodeDataException::class);
+		$this->expectExceptionMessage('invalid encoding id:');
+		/** @phan-suppress-next-line PhanNoopNew */
+		new $this->FQN(-1);
+	}
+
+	/**
+	 * since the ECI class only accepts integer values,
+	 * we'll use this test to check for the upper end of the accepted input range
+	 *
+	 * @inheritDoc
+	 */
+	public function testInvalidDataOnEmptyException():void{
+		$this->expectException(QRCodeDataException::class);
+		$this->expectExceptionMessage('invalid encoding id:');
+		/** @phan-suppress-next-line PhanNoopNew */
+		new $this->FQN(1000000);
+	}
+
+	public static function eciCharsetIdProvider():array{
+		return [
+			[     0,  8],
+			[   127,  8],
+			[   128, 16],
+			[ 16383, 16],
+			[ 16384, 24],
+			[999999, 24],
+		];
+	}
+
+	/**
+	 * @dataProvider eciCharsetIdProvider
+	 */
+	public function testReadWrite(int $id, int $lengthInBits):void{
+		$bitBuffer = new BitBuffer;
+		$eci       = (new $this->FQN($id))->write($bitBuffer, 1);
+
+		$this::assertSame($lengthInBits, $eci->getLengthInBits());
+		$this::assertSame(Mode::ECI, $bitBuffer->read(4));
+		/** @noinspection PhpUndefinedMethodInspection */
+		$this::assertSame($id, $this->FQN::parseValue($bitBuffer)->getID());
+	}
+
+	/**
+	 * Tests if and exception is thrown when the ECI segment is followed by a mode that is not 8-bit byte
+	 */
+	public function testDecodeECISegmentFollowedByInvalidModeException():void{
+		$this->expectException(QRCodeDataException::class);
+		$this->expectExceptionMessage('ECI designator followed by invalid mode:');
+
+		$options          = new QROptions;
+		$options->version = 5;
+
+		/** @var \chillerlan\QRCode\Data\QRDataModeInterface[] $segments */
+		$segments    = $this->getDataSegments();
+		// follow the ECI segment by a non-8bit-byte segment
+		$segments[1] = new Number('1');
+		$bitBuffer   = (new QRData($options, $segments))->getBitBuffer();
+		$this::assertSame(Mode::ECI, $bitBuffer->read(4));
+		/** @noinspection PhpUndefinedMethodInspection */
+		$this->FQN::decodeSegment($bitBuffer, $options->version);
+	}
+
+	public function unknownEncodingDataProvider():array{
+		return [
+			'CP437'              => [0, "\x41\x42\x43"],
+			'ISO_IEC_8859_1_GLI' => [1, "\x41\x42\x43"],
+		];
+	}
+
+	/**
+	 * Tests detection of an unknown character set
+	 *
+	 * @dataProvider unknownEncodingDataProvider
+	 */
+	public function testConvertUnknownEncoding(int $id, string $data):void{
+		$options          = new QROptions;
+		$options->version = 5;
+
+		$segments  = [new $this->FQN($id), new Byte($data)];
+		$bitBuffer = (new QRData($options, $segments))->getBitBuffer();
+		$this::assertSame(Mode::ECI, $bitBuffer->read(4));
+		/** @noinspection PhpUndefinedMethodInspection */
+		$this::assertSame($data, $this->FQN::decodeSegment($bitBuffer, $options->version));
+	}
+
+}