bit_cost.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. package brotli
  2. /* Copyright 2013 Google Inc. All Rights Reserved.
  3. Distributed under MIT license.
  4. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  5. */
  6. /* Functions to estimate the bit cost of Huffman trees. */
  7. func shannonEntropy(population []uint32, size uint, total *uint) float64 {
  8. var sum uint = 0
  9. var retval float64 = 0
  10. var population_end []uint32 = population[size:]
  11. var p uint
  12. for -cap(population) < -cap(population_end) {
  13. p = uint(population[0])
  14. population = population[1:]
  15. sum += p
  16. retval -= float64(p) * fastLog2(p)
  17. }
  18. if sum != 0 {
  19. retval += float64(sum) * fastLog2(sum)
  20. }
  21. *total = sum
  22. return retval
  23. }
  24. func bitsEntropy(population []uint32, size uint) float64 {
  25. var sum uint
  26. var retval float64 = shannonEntropy(population, size, &sum)
  27. if retval < float64(sum) {
  28. /* At least one bit per literal is needed. */
  29. retval = float64(sum)
  30. }
  31. return retval
  32. }
  33. const kOneSymbolHistogramCost float64 = 12
  34. const kTwoSymbolHistogramCost float64 = 20
  35. const kThreeSymbolHistogramCost float64 = 28
  36. const kFourSymbolHistogramCost float64 = 37
  37. func populationCostLiteral(histogram *histogramLiteral) float64 {
  38. var data_size uint = histogramDataSizeLiteral()
  39. var count int = 0
  40. var s [5]uint
  41. var bits float64 = 0.0
  42. var i uint
  43. if histogram.total_count_ == 0 {
  44. return kOneSymbolHistogramCost
  45. }
  46. for i = 0; i < data_size; i++ {
  47. if histogram.data_[i] > 0 {
  48. s[count] = i
  49. count++
  50. if count > 4 {
  51. break
  52. }
  53. }
  54. }
  55. if count == 1 {
  56. return kOneSymbolHistogramCost
  57. }
  58. if count == 2 {
  59. return kTwoSymbolHistogramCost + float64(histogram.total_count_)
  60. }
  61. if count == 3 {
  62. var histo0 uint32 = histogram.data_[s[0]]
  63. var histo1 uint32 = histogram.data_[s[1]]
  64. var histo2 uint32 = histogram.data_[s[2]]
  65. var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
  66. return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
  67. }
  68. if count == 4 {
  69. var histo [4]uint32
  70. var h23 uint32
  71. var histomax uint32
  72. for i = 0; i < 4; i++ {
  73. histo[i] = histogram.data_[s[i]]
  74. }
  75. /* Sort */
  76. for i = 0; i < 4; i++ {
  77. var j uint
  78. for j = i + 1; j < 4; j++ {
  79. if histo[j] > histo[i] {
  80. var tmp uint32 = histo[j]
  81. histo[j] = histo[i]
  82. histo[i] = tmp
  83. }
  84. }
  85. }
  86. h23 = histo[2] + histo[3]
  87. histomax = brotli_max_uint32_t(h23, histo[0])
  88. return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
  89. }
  90. {
  91. var max_depth uint = 1
  92. var depth_histo = [codeLengthCodes]uint32{0}
  93. /* In this loop we compute the entropy of the histogram and simultaneously
  94. build a simplified histogram of the code length codes where we use the
  95. zero repeat code 17, but we don't use the non-zero repeat code 16. */
  96. var log2total float64 = fastLog2(histogram.total_count_)
  97. for i = 0; i < data_size; {
  98. if histogram.data_[i] > 0 {
  99. var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
  100. /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
  101. = log2(total_count) - log2(count(symbol)) */
  102. var depth uint = uint(log2p + 0.5)
  103. /* Approximate the bit depth by round(-log2(P(symbol))) */
  104. bits += float64(histogram.data_[i]) * log2p
  105. if depth > 15 {
  106. depth = 15
  107. }
  108. if depth > max_depth {
  109. max_depth = depth
  110. }
  111. depth_histo[depth]++
  112. i++
  113. } else {
  114. var reps uint32 = 1
  115. /* Compute the run length of zeros and add the appropriate number of 0
  116. and 17 code length codes to the code length code histogram. */
  117. var k uint
  118. for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
  119. reps++
  120. }
  121. i += uint(reps)
  122. if i == data_size {
  123. /* Don't add any cost for the last zero run, since these are encoded
  124. only implicitly. */
  125. break
  126. }
  127. if reps < 3 {
  128. depth_histo[0] += reps
  129. } else {
  130. reps -= 2
  131. for reps > 0 {
  132. depth_histo[repeatZeroCodeLength]++
  133. /* Add the 3 extra bits for the 17 code length code. */
  134. bits += 3
  135. reps >>= 3
  136. }
  137. }
  138. }
  139. }
  140. /* Add the estimated encoding cost of the code length code histogram. */
  141. bits += float64(18 + 2*max_depth)
  142. /* Add the entropy of the code length code histogram. */
  143. bits += bitsEntropy(depth_histo[:], codeLengthCodes)
  144. }
  145. return bits
  146. }
  147. func populationCostCommand(histogram *histogramCommand) float64 {
  148. var data_size uint = histogramDataSizeCommand()
  149. var count int = 0
  150. var s [5]uint
  151. var bits float64 = 0.0
  152. var i uint
  153. if histogram.total_count_ == 0 {
  154. return kOneSymbolHistogramCost
  155. }
  156. for i = 0; i < data_size; i++ {
  157. if histogram.data_[i] > 0 {
  158. s[count] = i
  159. count++
  160. if count > 4 {
  161. break
  162. }
  163. }
  164. }
  165. if count == 1 {
  166. return kOneSymbolHistogramCost
  167. }
  168. if count == 2 {
  169. return kTwoSymbolHistogramCost + float64(histogram.total_count_)
  170. }
  171. if count == 3 {
  172. var histo0 uint32 = histogram.data_[s[0]]
  173. var histo1 uint32 = histogram.data_[s[1]]
  174. var histo2 uint32 = histogram.data_[s[2]]
  175. var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
  176. return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
  177. }
  178. if count == 4 {
  179. var histo [4]uint32
  180. var h23 uint32
  181. var histomax uint32
  182. for i = 0; i < 4; i++ {
  183. histo[i] = histogram.data_[s[i]]
  184. }
  185. /* Sort */
  186. for i = 0; i < 4; i++ {
  187. var j uint
  188. for j = i + 1; j < 4; j++ {
  189. if histo[j] > histo[i] {
  190. var tmp uint32 = histo[j]
  191. histo[j] = histo[i]
  192. histo[i] = tmp
  193. }
  194. }
  195. }
  196. h23 = histo[2] + histo[3]
  197. histomax = brotli_max_uint32_t(h23, histo[0])
  198. return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
  199. }
  200. {
  201. var max_depth uint = 1
  202. var depth_histo = [codeLengthCodes]uint32{0}
  203. /* In this loop we compute the entropy of the histogram and simultaneously
  204. build a simplified histogram of the code length codes where we use the
  205. zero repeat code 17, but we don't use the non-zero repeat code 16. */
  206. var log2total float64 = fastLog2(histogram.total_count_)
  207. for i = 0; i < data_size; {
  208. if histogram.data_[i] > 0 {
  209. var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
  210. /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
  211. = log2(total_count) - log2(count(symbol)) */
  212. var depth uint = uint(log2p + 0.5)
  213. /* Approximate the bit depth by round(-log2(P(symbol))) */
  214. bits += float64(histogram.data_[i]) * log2p
  215. if depth > 15 {
  216. depth = 15
  217. }
  218. if depth > max_depth {
  219. max_depth = depth
  220. }
  221. depth_histo[depth]++
  222. i++
  223. } else {
  224. var reps uint32 = 1
  225. /* Compute the run length of zeros and add the appropriate number of 0
  226. and 17 code length codes to the code length code histogram. */
  227. var k uint
  228. for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
  229. reps++
  230. }
  231. i += uint(reps)
  232. if i == data_size {
  233. /* Don't add any cost for the last zero run, since these are encoded
  234. only implicitly. */
  235. break
  236. }
  237. if reps < 3 {
  238. depth_histo[0] += reps
  239. } else {
  240. reps -= 2
  241. for reps > 0 {
  242. depth_histo[repeatZeroCodeLength]++
  243. /* Add the 3 extra bits for the 17 code length code. */
  244. bits += 3
  245. reps >>= 3
  246. }
  247. }
  248. }
  249. }
  250. /* Add the estimated encoding cost of the code length code histogram. */
  251. bits += float64(18 + 2*max_depth)
  252. /* Add the entropy of the code length code histogram. */
  253. bits += bitsEntropy(depth_histo[:], codeLengthCodes)
  254. }
  255. return bits
  256. }
  257. func populationCostDistance(histogram *histogramDistance) float64 {
  258. var data_size uint = histogramDataSizeDistance()
  259. var count int = 0
  260. var s [5]uint
  261. var bits float64 = 0.0
  262. var i uint
  263. if histogram.total_count_ == 0 {
  264. return kOneSymbolHistogramCost
  265. }
  266. for i = 0; i < data_size; i++ {
  267. if histogram.data_[i] > 0 {
  268. s[count] = i
  269. count++
  270. if count > 4 {
  271. break
  272. }
  273. }
  274. }
  275. if count == 1 {
  276. return kOneSymbolHistogramCost
  277. }
  278. if count == 2 {
  279. return kTwoSymbolHistogramCost + float64(histogram.total_count_)
  280. }
  281. if count == 3 {
  282. var histo0 uint32 = histogram.data_[s[0]]
  283. var histo1 uint32 = histogram.data_[s[1]]
  284. var histo2 uint32 = histogram.data_[s[2]]
  285. var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
  286. return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
  287. }
  288. if count == 4 {
  289. var histo [4]uint32
  290. var h23 uint32
  291. var histomax uint32
  292. for i = 0; i < 4; i++ {
  293. histo[i] = histogram.data_[s[i]]
  294. }
  295. /* Sort */
  296. for i = 0; i < 4; i++ {
  297. var j uint
  298. for j = i + 1; j < 4; j++ {
  299. if histo[j] > histo[i] {
  300. var tmp uint32 = histo[j]
  301. histo[j] = histo[i]
  302. histo[i] = tmp
  303. }
  304. }
  305. }
  306. h23 = histo[2] + histo[3]
  307. histomax = brotli_max_uint32_t(h23, histo[0])
  308. return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
  309. }
  310. {
  311. var max_depth uint = 1
  312. var depth_histo = [codeLengthCodes]uint32{0}
  313. /* In this loop we compute the entropy of the histogram and simultaneously
  314. build a simplified histogram of the code length codes where we use the
  315. zero repeat code 17, but we don't use the non-zero repeat code 16. */
  316. var log2total float64 = fastLog2(histogram.total_count_)
  317. for i = 0; i < data_size; {
  318. if histogram.data_[i] > 0 {
  319. var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
  320. /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
  321. = log2(total_count) - log2(count(symbol)) */
  322. var depth uint = uint(log2p + 0.5)
  323. /* Approximate the bit depth by round(-log2(P(symbol))) */
  324. bits += float64(histogram.data_[i]) * log2p
  325. if depth > 15 {
  326. depth = 15
  327. }
  328. if depth > max_depth {
  329. max_depth = depth
  330. }
  331. depth_histo[depth]++
  332. i++
  333. } else {
  334. var reps uint32 = 1
  335. /* Compute the run length of zeros and add the appropriate number of 0
  336. and 17 code length codes to the code length code histogram. */
  337. var k uint
  338. for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
  339. reps++
  340. }
  341. i += uint(reps)
  342. if i == data_size {
  343. /* Don't add any cost for the last zero run, since these are encoded
  344. only implicitly. */
  345. break
  346. }
  347. if reps < 3 {
  348. depth_histo[0] += reps
  349. } else {
  350. reps -= 2
  351. for reps > 0 {
  352. depth_histo[repeatZeroCodeLength]++
  353. /* Add the 3 extra bits for the 17 code length code. */
  354. bits += 3
  355. reps >>= 3
  356. }
  357. }
  358. }
  359. }
  360. /* Add the estimated encoding cost of the code length code histogram. */
  361. bits += float64(18 + 2*max_depth)
  362. /* Add the entropy of the code length code histogram. */
  363. bits += bitsEntropy(depth_histo[:], codeLengthCodes)
  364. }
  365. return bits
  366. }