Binarizer.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. <?php
  2. /**
  3. * Class Binarizer
  4. *
  5. * @created 17.01.2021
  6. * @author ZXing Authors
  7. * @author Smiley <smiley@chillerlan.net>
  8. * @copyright 2021 Smiley
  9. * @license Apache-2.0
  10. */
  11. namespace chillerlan\QRCode\Decoder;
  12. use RuntimeException;
  13. use function array_fill, count, max;
  14. /**
  15. * This class implements a local thresholding algorithm, which while slower than the
  16. * GlobalHistogramBinarizer, is fairly efficient for what it does. It is designed for
  17. * high frequency images of barcodes with black data on white backgrounds. For this application,
  18. * it does a much better job than a global blackpoint with severe shadows and gradients.
  19. * However it tends to produce artifacts on lower frequency images and is therefore not
  20. * a good general purpose binarizer for uses outside ZXing.
  21. *
  22. * This class extends GlobalHistogramBinarizer, using the older histogram approach for 1D readers,
  23. * and the newer local approach for 2D readers. 1D decoding using a per-row histogram is already
  24. * inherently local, and only fails for horizontal gradients. We can revisit that problem later,
  25. * but for now it was not a win to use local blocks for 1D.
  26. *
  27. * This Binarizer is the default for the unit tests and the recommended class for library users.
  28. *
  29. * @author dswitkin@google.com (Daniel Switkin)
  30. */
  31. final class Binarizer{
  32. // This class uses 5x5 blocks to compute local luminance, where each block is 8x8 pixels.
  33. // So this is the smallest dimension in each axis we can accept.
  34. private const BLOCK_SIZE_POWER = 3;
  35. private const BLOCK_SIZE = 8; // ...0100...00
  36. private const BLOCK_SIZE_MASK = 7; // ...0011...11
  37. private const MINIMUM_DIMENSION = 40;
  38. private const MIN_DYNAMIC_RANGE = 24;
  39. # private const LUMINANCE_BITS = 5;
  40. private const LUMINANCE_SHIFT = 3;
  41. private const LUMINANCE_BUCKETS = 32;
  42. private LuminanceSourceInterface $source;
  43. /**
  44. *
  45. */
  46. public function __construct(LuminanceSourceInterface $source){
  47. $this->source = $source;
  48. }
  49. /**
  50. * @throws \RuntimeException
  51. */
  52. private function estimateBlackPoint(array $buckets):int{
  53. // Find the tallest peak in the histogram.
  54. $numBuckets = count($buckets);
  55. $maxBucketCount = 0;
  56. $firstPeak = 0;
  57. $firstPeakSize = 0;
  58. for($x = 0; $x < $numBuckets; $x++){
  59. if($buckets[$x] > $firstPeakSize){
  60. $firstPeak = $x;
  61. $firstPeakSize = $buckets[$x];
  62. }
  63. if($buckets[$x] > $maxBucketCount){
  64. $maxBucketCount = $buckets[$x];
  65. }
  66. }
  67. // Find the second-tallest peak which is somewhat far from the tallest peak.
  68. $secondPeak = 0;
  69. $secondPeakScore = 0;
  70. for($x = 0; $x < $numBuckets; $x++){
  71. $distanceToBiggest = $x - $firstPeak;
  72. // Encourage more distant second peaks by multiplying by square of distance.
  73. $score = $buckets[$x] * $distanceToBiggest * $distanceToBiggest;
  74. if($score > $secondPeakScore){
  75. $secondPeak = $x;
  76. $secondPeakScore = $score;
  77. }
  78. }
  79. // Make sure firstPeak corresponds to the black peak.
  80. if($firstPeak > $secondPeak){
  81. $temp = $firstPeak;
  82. $firstPeak = $secondPeak;
  83. $secondPeak = $temp;
  84. }
  85. // If there is too little contrast in the image to pick a meaningful black point, throw rather
  86. // than waste time trying to decode the image, and risk false positives.
  87. if($secondPeak - $firstPeak <= $numBuckets / 16){
  88. throw new RuntimeException('no meaningful dark point found');
  89. }
  90. // Find a valley between them that is low and closer to the white peak.
  91. $bestValley = $secondPeak - 1;
  92. $bestValleyScore = -1;
  93. for($x = $secondPeak - 1; $x > $firstPeak; $x--){
  94. $fromFirst = $x - $firstPeak;
  95. $score = $fromFirst * $fromFirst * ($secondPeak - $x) * ($maxBucketCount - $buckets[$x]);
  96. if($score > $bestValleyScore){
  97. $bestValley = $x;
  98. $bestValleyScore = $score;
  99. }
  100. }
  101. return $bestValley << self::LUMINANCE_SHIFT;
  102. }
  103. /**
  104. * Calculates the final BitMatrix once for all requests. This could be called once from the
  105. * constructor instead, but there are some advantages to doing it lazily, such as making
  106. * profiling easier, and not doing heavy lifting when callers don't expect it.
  107. *
  108. * Converts a 2D array of luminance data to 1 bit data. As above, assume this method is expensive
  109. * and do not call it repeatedly. This method is intended for decoding 2D barcodes and may or
  110. * may not apply sharpening. Therefore, a row from this matrix may not be identical to one
  111. * fetched using getBlackRow(), so don't mix and match between them.
  112. *
  113. * @return \chillerlan\QRCode\Decoder\BitMatrix The 2D array of bits for the image (true means black).
  114. */
  115. public function getBlackMatrix():BitMatrix{
  116. $width = $this->source->getWidth();
  117. $height = $this->source->getHeight();
  118. if($width >= self::MINIMUM_DIMENSION && $height >= self::MINIMUM_DIMENSION){
  119. $subWidth = $width >> self::BLOCK_SIZE_POWER;
  120. if(($width & self::BLOCK_SIZE_MASK) !== 0){
  121. $subWidth++;
  122. }
  123. $subHeight = $height >> self::BLOCK_SIZE_POWER;
  124. if(($height & self::BLOCK_SIZE_MASK) !== 0){
  125. $subHeight++;
  126. }
  127. return $this->calculateThresholdForBlock($subWidth, $subHeight, $width, $height);
  128. }
  129. // If the image is too small, fall back to the global histogram approach.
  130. return $this->getHistogramBlackMatrix($width, $height);
  131. }
  132. /**
  133. *
  134. */
  135. public function getHistogramBlackMatrix(int $width, int $height):BitMatrix{
  136. $matrix = new BitMatrix(max($width, $height));
  137. // Quickly calculates the histogram by sampling four rows from the image. This proved to be
  138. // more robust on the blackbox tests than sampling a diagonal as we used to do.
  139. $buckets = array_fill(0, self::LUMINANCE_BUCKETS, 0);
  140. for($y = 1; $y < 5; $y++){
  141. $row = (int)($height * $y / 5);
  142. $localLuminances = $this->source->getRow($row);
  143. $right = (int)(($width * 4) / 5);
  144. for($x = (int)($width / 5); $x < $right; $x++){
  145. $pixel = $localLuminances[(int)$x] & 0xff;
  146. $buckets[$pixel >> self::LUMINANCE_SHIFT]++;
  147. }
  148. }
  149. $blackPoint = $this->estimateBlackPoint($buckets);
  150. // We delay reading the entire image luminance until the black point estimation succeeds.
  151. // Although we end up reading four rows twice, it is consistent with our motto of
  152. // "fail quickly" which is necessary for continuous scanning.
  153. $localLuminances = $this->source->getMatrix();
  154. for($y = 0; $y < $height; $y++){
  155. $offset = $y * $width;
  156. for($x = 0; $x < $width; $x++){
  157. $pixel = (int)($localLuminances[$offset + $x] & 0xff);
  158. if($pixel < $blackPoint){
  159. $matrix->set($x, $y);
  160. }
  161. }
  162. }
  163. return $matrix;
  164. }
  165. /**
  166. * Calculates a single black point for each block of pixels and saves it away.
  167. * See the following thread for a discussion of this algorithm:
  168. *
  169. * @see http://groups.google.com/group/zxing/browse_thread/thread/d06efa2c35a7ddc0
  170. */
  171. private function calculateBlackPoints(array $luminances, int $subWidth, int $subHeight, int $width, int $height):array{
  172. $blackPoints = array_fill(0, $subHeight, 0);
  173. foreach($blackPoints as $key => $point){
  174. $blackPoints[$key] = array_fill(0, $subWidth, 0);
  175. }
  176. for($y = 0; $y < $subHeight; $y++){
  177. $yoffset = ($y << self::BLOCK_SIZE_POWER);
  178. $maxYOffset = $height - self::BLOCK_SIZE;
  179. if($yoffset > $maxYOffset){
  180. $yoffset = $maxYOffset;
  181. }
  182. for($x = 0; $x < $subWidth; $x++){
  183. $xoffset = ($x << self::BLOCK_SIZE_POWER);
  184. $maxXOffset = $width - self::BLOCK_SIZE;
  185. if($xoffset > $maxXOffset){
  186. $xoffset = $maxXOffset;
  187. }
  188. $sum = 0;
  189. $min = 255;
  190. $max = 0;
  191. for($yy = 0, $offset = $yoffset * $width + $xoffset; $yy < self::BLOCK_SIZE; $yy++, $offset += $width){
  192. for($xx = 0; $xx < self::BLOCK_SIZE; $xx++){
  193. $pixel = (int)($luminances[(int)($offset + $xx)]) & 0xff;
  194. $sum += $pixel;
  195. // still looking for good contrast
  196. if($pixel < $min){
  197. $min = $pixel;
  198. }
  199. if($pixel > $max){
  200. $max = $pixel;
  201. }
  202. }
  203. // short-circuit min/max tests once dynamic range is met
  204. if($max - $min > self::MIN_DYNAMIC_RANGE){
  205. // finish the rest of the rows quickly
  206. for($yy++, $offset += $width; $yy < self::BLOCK_SIZE; $yy++, $offset += $width){
  207. for($xx = 0; $xx < self::BLOCK_SIZE; $xx++){
  208. $sum += $luminances[$offset + $xx] & 0xff;
  209. }
  210. }
  211. }
  212. }
  213. // The default estimate is the average of the values in the block.
  214. $average = $sum >> (self::BLOCK_SIZE_POWER * 2);
  215. if($max - $min <= self::MIN_DYNAMIC_RANGE){
  216. // If variation within the block is low, assume this is a block with only light or only
  217. // dark pixels. In that case we do not want to use the average, as it would divide this
  218. // low contrast area into black and white pixels, essentially creating data out of noise.
  219. //
  220. // The default assumption is that the block is light/background. Since no estimate for
  221. // the level of dark pixels exists locally, use half the min for the block.
  222. $average = (int)($min / 2);
  223. if($y > 0 && $x > 0){
  224. // Correct the "white background" assumption for blocks that have neighbors by comparing
  225. // the pixels in this block to the previously calculated black points. This is based on
  226. // the fact that dark barcode symbology is always surrounded by some amount of light
  227. // background for which reasonable black point estimates were made. The bp estimated at
  228. // the boundaries is used for the interior.
  229. // The (min < bp) is arbitrary but works better than other heuristics that were tried.
  230. $averageNeighborBlackPoint = (int)(($blackPoints[$y - 1][$x] + (2 * $blackPoints[$y][$x - 1]) + $blackPoints[$y - 1][$x - 1]) / 4);
  231. if($min < $averageNeighborBlackPoint){
  232. $average = $averageNeighborBlackPoint;
  233. }
  234. }
  235. }
  236. $blackPoints[$y][$x] = (int)($average);
  237. }
  238. }
  239. return $blackPoints;
  240. }
  241. /**
  242. * For each block in the image, calculate the average black point using a 5x5 grid
  243. * of the blocks around it. Also handles the corner cases (fractional blocks are computed based
  244. * on the last pixels in the row/column which are also used in the previous block).
  245. */
  246. private function calculateThresholdForBlock(
  247. int $subWidth,
  248. int $subHeight,
  249. int $width,
  250. int $height
  251. ):BitMatrix{
  252. $matrix = new BitMatrix(max($width, $height));
  253. $luminances = $this->source->getMatrix();
  254. $blackPoints = $this->calculateBlackPoints($luminances, $subWidth, $subHeight, $width, $height);
  255. for($y = 0; $y < $subHeight; $y++){
  256. $yoffset = ($y << self::BLOCK_SIZE_POWER);
  257. $maxYOffset = $height - self::BLOCK_SIZE;
  258. if($yoffset > $maxYOffset){
  259. $yoffset = $maxYOffset;
  260. }
  261. for($x = 0; $x < $subWidth; $x++){
  262. $xoffset = ($x << self::BLOCK_SIZE_POWER);
  263. $maxXOffset = $width - self::BLOCK_SIZE;
  264. if($xoffset > $maxXOffset){
  265. $xoffset = $maxXOffset;
  266. }
  267. $left = $this->cap($x, 2, $subWidth - 3);
  268. $top = $this->cap($y, 2, $subHeight - 3);
  269. $sum = 0;
  270. for($z = -2; $z <= 2; $z++){
  271. $blackRow = $blackPoints[$top + $z];
  272. $sum += $blackRow[$left - 2] + $blackRow[$left - 1] + $blackRow[$left] + $blackRow[$left + 1] + $blackRow[$left + 2];
  273. }
  274. $average = (int)($sum / 25);
  275. // Applies a single threshold to a block of pixels.
  276. for($j = 0, $o = $yoffset * $width + $xoffset; $j < self::BLOCK_SIZE; $j++, $o += $width){
  277. for($i = 0; $i < self::BLOCK_SIZE; $i++){
  278. // Comparison needs to be <= so that black == 0 pixels are black even if the threshold is 0.
  279. if(($luminances[$o + $i] & 0xff) <= $average){
  280. $matrix->set($xoffset + $i, $yoffset + $j);
  281. }
  282. }
  283. }
  284. }
  285. }
  286. return $matrix;
  287. }
  288. private function cap(int $value, int $min, int $max):int{
  289. if($value < $min){
  290. return $min;
  291. }
  292. if($value > $max){
  293. return $max;
  294. }
  295. return $value;
  296. }
  297. }