123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436 |
- package brotli
- /* Copyright 2013 Google Inc. All Rights Reserved.
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
- */
- /* Functions to estimate the bit cost of Huffman trees. */
- func shannonEntropy(population []uint32, size uint, total *uint) float64 {
- var sum uint = 0
- var retval float64 = 0
- var population_end []uint32 = population[size:]
- var p uint
- for -cap(population) < -cap(population_end) {
- p = uint(population[0])
- population = population[1:]
- sum += p
- retval -= float64(p) * fastLog2(p)
- }
- if sum != 0 {
- retval += float64(sum) * fastLog2(sum)
- }
- *total = sum
- return retval
- }
- func bitsEntropy(population []uint32, size uint) float64 {
- var sum uint
- var retval float64 = shannonEntropy(population, size, &sum)
- if retval < float64(sum) {
- /* At least one bit per literal is needed. */
- retval = float64(sum)
- }
- return retval
- }
- const kOneSymbolHistogramCost float64 = 12
- const kTwoSymbolHistogramCost float64 = 20
- const kThreeSymbolHistogramCost float64 = 28
- const kFourSymbolHistogramCost float64 = 37
- func populationCostLiteral(histogram *histogramLiteral) float64 {
- var data_size uint = histogramDataSizeLiteral()
- var count int = 0
- var s [5]uint
- var bits float64 = 0.0
- var i uint
- if histogram.total_count_ == 0 {
- return kOneSymbolHistogramCost
- }
- for i = 0; i < data_size; i++ {
- if histogram.data_[i] > 0 {
- s[count] = i
- count++
- if count > 4 {
- break
- }
- }
- }
- if count == 1 {
- return kOneSymbolHistogramCost
- }
- if count == 2 {
- return kTwoSymbolHistogramCost + float64(histogram.total_count_)
- }
- if count == 3 {
- var histo0 uint32 = histogram.data_[s[0]]
- var histo1 uint32 = histogram.data_[s[1]]
- var histo2 uint32 = histogram.data_[s[2]]
- var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
- return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
- }
- if count == 4 {
- var histo [4]uint32
- var h23 uint32
- var histomax uint32
- for i = 0; i < 4; i++ {
- histo[i] = histogram.data_[s[i]]
- }
- /* Sort */
- for i = 0; i < 4; i++ {
- var j uint
- for j = i + 1; j < 4; j++ {
- if histo[j] > histo[i] {
- var tmp uint32 = histo[j]
- histo[j] = histo[i]
- histo[i] = tmp
- }
- }
- }
- h23 = histo[2] + histo[3]
- histomax = brotli_max_uint32_t(h23, histo[0])
- return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
- }
- {
- var max_depth uint = 1
- var depth_histo = [codeLengthCodes]uint32{0}
- /* In this loop we compute the entropy of the histogram and simultaneously
- build a simplified histogram of the code length codes where we use the
- zero repeat code 17, but we don't use the non-zero repeat code 16. */
- var log2total float64 = fastLog2(histogram.total_count_)
- for i = 0; i < data_size; {
- if histogram.data_[i] > 0 {
- var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
- /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
- = log2(total_count) - log2(count(symbol)) */
- var depth uint = uint(log2p + 0.5)
- /* Approximate the bit depth by round(-log2(P(symbol))) */
- bits += float64(histogram.data_[i]) * log2p
- if depth > 15 {
- depth = 15
- }
- if depth > max_depth {
- max_depth = depth
- }
- depth_histo[depth]++
- i++
- } else {
- var reps uint32 = 1
- /* Compute the run length of zeros and add the appropriate number of 0
- and 17 code length codes to the code length code histogram. */
- var k uint
- for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
- reps++
- }
- i += uint(reps)
- if i == data_size {
- /* Don't add any cost for the last zero run, since these are encoded
- only implicitly. */
- break
- }
- if reps < 3 {
- depth_histo[0] += reps
- } else {
- reps -= 2
- for reps > 0 {
- depth_histo[repeatZeroCodeLength]++
- /* Add the 3 extra bits for the 17 code length code. */
- bits += 3
- reps >>= 3
- }
- }
- }
- }
- /* Add the estimated encoding cost of the code length code histogram. */
- bits += float64(18 + 2*max_depth)
- /* Add the entropy of the code length code histogram. */
- bits += bitsEntropy(depth_histo[:], codeLengthCodes)
- }
- return bits
- }
- func populationCostCommand(histogram *histogramCommand) float64 {
- var data_size uint = histogramDataSizeCommand()
- var count int = 0
- var s [5]uint
- var bits float64 = 0.0
- var i uint
- if histogram.total_count_ == 0 {
- return kOneSymbolHistogramCost
- }
- for i = 0; i < data_size; i++ {
- if histogram.data_[i] > 0 {
- s[count] = i
- count++
- if count > 4 {
- break
- }
- }
- }
- if count == 1 {
- return kOneSymbolHistogramCost
- }
- if count == 2 {
- return kTwoSymbolHistogramCost + float64(histogram.total_count_)
- }
- if count == 3 {
- var histo0 uint32 = histogram.data_[s[0]]
- var histo1 uint32 = histogram.data_[s[1]]
- var histo2 uint32 = histogram.data_[s[2]]
- var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
- return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
- }
- if count == 4 {
- var histo [4]uint32
- var h23 uint32
- var histomax uint32
- for i = 0; i < 4; i++ {
- histo[i] = histogram.data_[s[i]]
- }
- /* Sort */
- for i = 0; i < 4; i++ {
- var j uint
- for j = i + 1; j < 4; j++ {
- if histo[j] > histo[i] {
- var tmp uint32 = histo[j]
- histo[j] = histo[i]
- histo[i] = tmp
- }
- }
- }
- h23 = histo[2] + histo[3]
- histomax = brotli_max_uint32_t(h23, histo[0])
- return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
- }
- {
- var max_depth uint = 1
- var depth_histo = [codeLengthCodes]uint32{0}
- /* In this loop we compute the entropy of the histogram and simultaneously
- build a simplified histogram of the code length codes where we use the
- zero repeat code 17, but we don't use the non-zero repeat code 16. */
- var log2total float64 = fastLog2(histogram.total_count_)
- for i = 0; i < data_size; {
- if histogram.data_[i] > 0 {
- var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
- /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
- = log2(total_count) - log2(count(symbol)) */
- var depth uint = uint(log2p + 0.5)
- /* Approximate the bit depth by round(-log2(P(symbol))) */
- bits += float64(histogram.data_[i]) * log2p
- if depth > 15 {
- depth = 15
- }
- if depth > max_depth {
- max_depth = depth
- }
- depth_histo[depth]++
- i++
- } else {
- var reps uint32 = 1
- /* Compute the run length of zeros and add the appropriate number of 0
- and 17 code length codes to the code length code histogram. */
- var k uint
- for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
- reps++
- }
- i += uint(reps)
- if i == data_size {
- /* Don't add any cost for the last zero run, since these are encoded
- only implicitly. */
- break
- }
- if reps < 3 {
- depth_histo[0] += reps
- } else {
- reps -= 2
- for reps > 0 {
- depth_histo[repeatZeroCodeLength]++
- /* Add the 3 extra bits for the 17 code length code. */
- bits += 3
- reps >>= 3
- }
- }
- }
- }
- /* Add the estimated encoding cost of the code length code histogram. */
- bits += float64(18 + 2*max_depth)
- /* Add the entropy of the code length code histogram. */
- bits += bitsEntropy(depth_histo[:], codeLengthCodes)
- }
- return bits
- }
- func populationCostDistance(histogram *histogramDistance) float64 {
- var data_size uint = histogramDataSizeDistance()
- var count int = 0
- var s [5]uint
- var bits float64 = 0.0
- var i uint
- if histogram.total_count_ == 0 {
- return kOneSymbolHistogramCost
- }
- for i = 0; i < data_size; i++ {
- if histogram.data_[i] > 0 {
- s[count] = i
- count++
- if count > 4 {
- break
- }
- }
- }
- if count == 1 {
- return kOneSymbolHistogramCost
- }
- if count == 2 {
- return kTwoSymbolHistogramCost + float64(histogram.total_count_)
- }
- if count == 3 {
- var histo0 uint32 = histogram.data_[s[0]]
- var histo1 uint32 = histogram.data_[s[1]]
- var histo2 uint32 = histogram.data_[s[2]]
- var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
- return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
- }
- if count == 4 {
- var histo [4]uint32
- var h23 uint32
- var histomax uint32
- for i = 0; i < 4; i++ {
- histo[i] = histogram.data_[s[i]]
- }
- /* Sort */
- for i = 0; i < 4; i++ {
- var j uint
- for j = i + 1; j < 4; j++ {
- if histo[j] > histo[i] {
- var tmp uint32 = histo[j]
- histo[j] = histo[i]
- histo[i] = tmp
- }
- }
- }
- h23 = histo[2] + histo[3]
- histomax = brotli_max_uint32_t(h23, histo[0])
- return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
- }
- {
- var max_depth uint = 1
- var depth_histo = [codeLengthCodes]uint32{0}
- /* In this loop we compute the entropy of the histogram and simultaneously
- build a simplified histogram of the code length codes where we use the
- zero repeat code 17, but we don't use the non-zero repeat code 16. */
- var log2total float64 = fastLog2(histogram.total_count_)
- for i = 0; i < data_size; {
- if histogram.data_[i] > 0 {
- var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
- /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
- = log2(total_count) - log2(count(symbol)) */
- var depth uint = uint(log2p + 0.5)
- /* Approximate the bit depth by round(-log2(P(symbol))) */
- bits += float64(histogram.data_[i]) * log2p
- if depth > 15 {
- depth = 15
- }
- if depth > max_depth {
- max_depth = depth
- }
- depth_histo[depth]++
- i++
- } else {
- var reps uint32 = 1
- /* Compute the run length of zeros and add the appropriate number of 0
- and 17 code length codes to the code length code histogram. */
- var k uint
- for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
- reps++
- }
- i += uint(reps)
- if i == data_size {
- /* Don't add any cost for the last zero run, since these are encoded
- only implicitly. */
- break
- }
- if reps < 3 {
- depth_histo[0] += reps
- } else {
- reps -= 2
- for reps > 0 {
- depth_histo[repeatZeroCodeLength]++
- /* Add the 3 extra bits for the 17 code length code. */
- bits += 3
- reps >>= 3
- }
- }
- }
- }
- /* Add the estimated encoding cost of the code length code histogram. */
- bits += float64(18 + 2*max_depth)
- /* Add the entropy of the code length code histogram. */
- bits += bitsEntropy(depth_histo[:], codeLengthCodes)
- }
- return bits
- }
|