encode.go 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220
  1. package brotli
  2. import (
  3. "io"
  4. "math"
  5. )
  6. /* Copyright 2016 Google Inc. All Rights Reserved.
  7. Distributed under MIT license.
  8. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  9. */
  10. /** Minimal value for ::BROTLI_PARAM_LGWIN parameter. */
  11. const minWindowBits = 10
  12. /**
  13. * Maximal value for ::BROTLI_PARAM_LGWIN parameter.
  14. *
  15. * @note equal to @c BROTLI_MAX_DISTANCE_BITS constant.
  16. */
  17. const maxWindowBits = 24
  18. /**
  19. * Maximal value for ::BROTLI_PARAM_LGWIN parameter
  20. * in "Large Window Brotli" (32-bit).
  21. */
  22. const largeMaxWindowBits = 30
  23. /** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */
  24. const minInputBlockBits = 16
  25. /** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */
  26. const maxInputBlockBits = 24
  27. /** Minimal value for ::BROTLI_PARAM_QUALITY parameter. */
  28. const minQuality = 0
  29. /** Maximal value for ::BROTLI_PARAM_QUALITY parameter. */
  30. const maxQuality = 11
  31. /** Options for ::BROTLI_PARAM_MODE parameter. */
  32. const (
  33. modeGeneric = 0
  34. modeText = 1
  35. modeFont = 2
  36. )
  37. /** Default value for ::BROTLI_PARAM_QUALITY parameter. */
  38. const defaultQuality = 11
  39. /** Default value for ::BROTLI_PARAM_LGWIN parameter. */
  40. const defaultWindow = 22
  41. /** Default value for ::BROTLI_PARAM_MODE parameter. */
  42. const defaultMode = modeGeneric
  43. /** Operations that can be performed by streaming encoder. */
  44. const (
  45. operationProcess = 0
  46. operationFlush = 1
  47. operationFinish = 2
  48. operationEmitMetadata = 3
  49. )
  50. const (
  51. streamProcessing = 0
  52. streamFlushRequested = 1
  53. streamFinished = 2
  54. streamMetadataHead = 3
  55. streamMetadataBody = 4
  56. )
  57. type Writer struct {
  58. dst io.Writer
  59. options WriterOptions
  60. err error
  61. params encoderParams
  62. hasher_ hasherHandle
  63. input_pos_ uint64
  64. ringbuffer_ ringBuffer
  65. commands []command
  66. num_literals_ uint
  67. last_insert_len_ uint
  68. last_flush_pos_ uint64
  69. last_processed_pos_ uint64
  70. dist_cache_ [numDistanceShortCodes]int
  71. saved_dist_cache_ [4]int
  72. last_bytes_ uint16
  73. last_bytes_bits_ byte
  74. prev_byte_ byte
  75. prev_byte2_ byte
  76. storage []byte
  77. small_table_ [1 << 10]int
  78. large_table_ []int
  79. large_table_size_ uint
  80. cmd_depths_ [128]byte
  81. cmd_bits_ [128]uint16
  82. cmd_code_ [512]byte
  83. cmd_code_numbits_ uint
  84. command_buf_ []uint32
  85. literal_buf_ []byte
  86. tiny_buf_ struct {
  87. u64 [2]uint64
  88. u8 [16]byte
  89. }
  90. remaining_metadata_bytes_ uint32
  91. stream_state_ int
  92. is_last_block_emitted_ bool
  93. is_initialized_ bool
  94. }
  95. func inputBlockSize(s *Writer) uint {
  96. return uint(1) << uint(s.params.lgblock)
  97. }
  98. func unprocessedInputSize(s *Writer) uint64 {
  99. return s.input_pos_ - s.last_processed_pos_
  100. }
  101. func remainingInputBlockSize(s *Writer) uint {
  102. var delta uint64 = unprocessedInputSize(s)
  103. var block_size uint = inputBlockSize(s)
  104. if delta >= uint64(block_size) {
  105. return 0
  106. }
  107. return block_size - uint(delta)
  108. }
  109. /* Wraps 64-bit input position to 32-bit ring-buffer position preserving
  110. "not-a-first-lap" feature. */
  111. func wrapPosition(position uint64) uint32 {
  112. var result uint32 = uint32(position)
  113. var gb uint64 = position >> 30
  114. if gb > 2 {
  115. /* Wrap every 2GiB; The first 3GB are continuous. */
  116. result = result&((1<<30)-1) | (uint32((gb-1)&1)+1)<<30
  117. }
  118. return result
  119. }
  120. func (s *Writer) getStorage(size int) []byte {
  121. if len(s.storage) < size {
  122. s.storage = make([]byte, size)
  123. }
  124. return s.storage
  125. }
  126. func hashTableSize(max_table_size uint, input_size uint) uint {
  127. var htsize uint = 256
  128. for htsize < max_table_size && htsize < input_size {
  129. htsize <<= 1
  130. }
  131. return htsize
  132. }
  133. func getHashTable(s *Writer, quality int, input_size uint, table_size *uint) []int {
  134. var max_table_size uint = maxHashTableSize(quality)
  135. var htsize uint = hashTableSize(max_table_size, input_size)
  136. /* Use smaller hash table when input.size() is smaller, since we
  137. fill the table, incurring O(hash table size) overhead for
  138. compression, and if the input is short, we won't need that
  139. many hash table entries anyway. */
  140. var table []int
  141. assert(max_table_size >= 256)
  142. if quality == fastOnePassCompressionQuality {
  143. /* Only odd shifts are supported by fast-one-pass. */
  144. if htsize&0xAAAAA == 0 {
  145. htsize <<= 1
  146. }
  147. }
  148. if htsize <= uint(len(s.small_table_)) {
  149. table = s.small_table_[:]
  150. } else {
  151. if htsize > s.large_table_size_ {
  152. s.large_table_size_ = htsize
  153. s.large_table_ = nil
  154. s.large_table_ = make([]int, htsize)
  155. }
  156. table = s.large_table_
  157. }
  158. *table_size = htsize
  159. for i := 0; i < int(htsize); i++ {
  160. table[i] = 0
  161. }
  162. return table
  163. }
  164. func encodeWindowBits(lgwin int, large_window bool, last_bytes *uint16, last_bytes_bits *byte) {
  165. if large_window {
  166. *last_bytes = uint16((lgwin&0x3F)<<8 | 0x11)
  167. *last_bytes_bits = 14
  168. } else {
  169. if lgwin == 16 {
  170. *last_bytes = 0
  171. *last_bytes_bits = 1
  172. } else if lgwin == 17 {
  173. *last_bytes = 1
  174. *last_bytes_bits = 7
  175. } else if lgwin > 17 {
  176. *last_bytes = uint16((lgwin-17)<<1 | 0x01)
  177. *last_bytes_bits = 4
  178. } else {
  179. *last_bytes = uint16((lgwin-8)<<4 | 0x01)
  180. *last_bytes_bits = 7
  181. }
  182. }
  183. }
  184. /* Decide about the context map based on the ability of the prediction
  185. ability of the previous byte UTF8-prefix on the next byte. The
  186. prediction ability is calculated as Shannon entropy. Here we need
  187. Shannon entropy instead of 'BitsEntropy' since the prefix will be
  188. encoded with the remaining 6 bits of the following byte, and
  189. BitsEntropy will assume that symbol to be stored alone using Huffman
  190. coding. */
  191. var kStaticContextMapContinuation = [64]uint32{
  192. 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  193. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  194. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  195. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  196. }
  197. var kStaticContextMapSimpleUTF8 = [64]uint32{
  198. 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  199. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  200. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  201. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  202. }
  203. func chooseContextMap(quality int, bigram_histo []uint32, num_literal_contexts *uint, literal_context_map *[]uint32) {
  204. var monogram_histo = [3]uint32{0}
  205. var two_prefix_histo = [6]uint32{0}
  206. var total uint
  207. var i uint
  208. var dummy uint
  209. var entropy [4]float64
  210. for i = 0; i < 9; i++ {
  211. monogram_histo[i%3] += bigram_histo[i]
  212. two_prefix_histo[i%6] += bigram_histo[i]
  213. }
  214. entropy[1] = shannonEntropy(monogram_histo[:], 3, &dummy)
  215. entropy[2] = (shannonEntropy(two_prefix_histo[:], 3, &dummy) + shannonEntropy(two_prefix_histo[3:], 3, &dummy))
  216. entropy[3] = 0
  217. for i = 0; i < 3; i++ {
  218. entropy[3] += shannonEntropy(bigram_histo[3*i:], 3, &dummy)
  219. }
  220. total = uint(monogram_histo[0] + monogram_histo[1] + monogram_histo[2])
  221. assert(total != 0)
  222. entropy[0] = 1.0 / float64(total)
  223. entropy[1] *= entropy[0]
  224. entropy[2] *= entropy[0]
  225. entropy[3] *= entropy[0]
  226. if quality < minQualityForHqContextModeling {
  227. /* 3 context models is a bit slower, don't use it at lower qualities. */
  228. entropy[3] = entropy[1] * 10
  229. }
  230. /* If expected savings by symbol are less than 0.2 bits, skip the
  231. context modeling -- in exchange for faster decoding speed. */
  232. if entropy[1]-entropy[2] < 0.2 && entropy[1]-entropy[3] < 0.2 {
  233. *num_literal_contexts = 1
  234. } else if entropy[2]-entropy[3] < 0.02 {
  235. *num_literal_contexts = 2
  236. *literal_context_map = kStaticContextMapSimpleUTF8[:]
  237. } else {
  238. *num_literal_contexts = 3
  239. *literal_context_map = kStaticContextMapContinuation[:]
  240. }
  241. }
  242. /* Decide if we want to use a more complex static context map containing 13
  243. context values, based on the entropy reduction of histograms over the
  244. first 5 bits of literals. */
  245. var kStaticContextMapComplexUTF8 = [64]uint32{
  246. 11, 11, 12, 12, /* 0 special */
  247. 0, 0, 0, 0, /* 4 lf */
  248. 1, 1, 9, 9, /* 8 space */
  249. 2, 2, 2, 2, /* !, first after space/lf and after something else. */
  250. 1, 1, 1, 1, /* " */
  251. 8, 3, 3, 3, /* % */
  252. 1, 1, 1, 1, /* ({[ */
  253. 2, 2, 2, 2, /* }]) */
  254. 8, 4, 4, 4, /* :; */
  255. 8, 7, 4, 4, /* . */
  256. 8, 0, 0, 0, /* > */
  257. 3, 3, 3, 3, /* [0..9] */
  258. 5, 5, 10, 5, /* [A-Z] */
  259. 5, 5, 10, 5,
  260. 6, 6, 6, 6, /* [a-z] */
  261. 6, 6, 6, 6,
  262. }
  263. func shouldUseComplexStaticContextMap(input []byte, start_pos uint, length uint, mask uint, quality int, size_hint uint, num_literal_contexts *uint, literal_context_map *[]uint32) bool {
  264. /* Try the more complex static context map only for long data. */
  265. if size_hint < 1<<20 {
  266. return false
  267. } else {
  268. var end_pos uint = start_pos + length
  269. var combined_histo = [32]uint32{0}
  270. var context_histo = [13][32]uint32{[32]uint32{0}}
  271. var total uint32 = 0
  272. var entropy [3]float64
  273. var dummy uint
  274. var i uint
  275. var utf8_lut contextLUT = getContextLUT(contextUTF8)
  276. /* To make entropy calculations faster and to fit on the stack, we collect
  277. histograms over the 5 most significant bits of literals. One histogram
  278. without context and 13 additional histograms for each context value. */
  279. for ; start_pos+64 <= end_pos; start_pos += 4096 {
  280. var stride_end_pos uint = start_pos + 64
  281. var prev2 byte = input[start_pos&mask]
  282. var prev1 byte = input[(start_pos+1)&mask]
  283. var pos uint
  284. /* To make the analysis of the data faster we only examine 64 byte long
  285. strides at every 4kB intervals. */
  286. for pos = start_pos + 2; pos < stride_end_pos; pos++ {
  287. var literal byte = input[pos&mask]
  288. var context byte = byte(kStaticContextMapComplexUTF8[getContext(prev1, prev2, utf8_lut)])
  289. total++
  290. combined_histo[literal>>3]++
  291. context_histo[context][literal>>3]++
  292. prev2 = prev1
  293. prev1 = literal
  294. }
  295. }
  296. entropy[1] = shannonEntropy(combined_histo[:], 32, &dummy)
  297. entropy[2] = 0
  298. for i = 0; i < 13; i++ {
  299. entropy[2] += shannonEntropy(context_histo[i][0:], 32, &dummy)
  300. }
  301. entropy[0] = 1.0 / float64(total)
  302. entropy[1] *= entropy[0]
  303. entropy[2] *= entropy[0]
  304. /* The triggering heuristics below were tuned by compressing the individual
  305. files of the silesia corpus. If we skip this kind of context modeling
  306. for not very well compressible input (i.e. entropy using context modeling
  307. is 60% of maximal entropy) or if expected savings by symbol are less
  308. than 0.2 bits, then in every case when it triggers, the final compression
  309. ratio is improved. Note however that this heuristics might be too strict
  310. for some cases and could be tuned further. */
  311. if entropy[2] > 3.0 || entropy[1]-entropy[2] < 0.2 {
  312. return false
  313. } else {
  314. *num_literal_contexts = 13
  315. *literal_context_map = kStaticContextMapComplexUTF8[:]
  316. return true
  317. }
  318. }
  319. }
  320. func decideOverLiteralContextModeling(input []byte, start_pos uint, length uint, mask uint, quality int, size_hint uint, num_literal_contexts *uint, literal_context_map *[]uint32) {
  321. if quality < minQualityForContextModeling || length < 64 {
  322. return
  323. } else if shouldUseComplexStaticContextMap(input, start_pos, length, mask, quality, size_hint, num_literal_contexts, literal_context_map) {
  324. } else /* Context map was already set, nothing else to do. */
  325. {
  326. var end_pos uint = start_pos + length
  327. /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
  328. UTF8 data faster we only examine 64 byte long strides at every 4kB
  329. intervals. */
  330. var bigram_prefix_histo = [9]uint32{0}
  331. for ; start_pos+64 <= end_pos; start_pos += 4096 {
  332. var lut = [4]int{0, 0, 1, 2}
  333. var stride_end_pos uint = start_pos + 64
  334. var prev int = lut[input[start_pos&mask]>>6] * 3
  335. var pos uint
  336. for pos = start_pos + 1; pos < stride_end_pos; pos++ {
  337. var literal byte = input[pos&mask]
  338. bigram_prefix_histo[prev+lut[literal>>6]]++
  339. prev = lut[literal>>6] * 3
  340. }
  341. }
  342. chooseContextMap(quality, bigram_prefix_histo[0:], num_literal_contexts, literal_context_map)
  343. }
  344. }
  345. func shouldCompress_encode(data []byte, mask uint, last_flush_pos uint64, bytes uint, num_literals uint, num_commands uint) bool {
  346. /* TODO: find more precise minimal block overhead. */
  347. if bytes <= 2 {
  348. return false
  349. }
  350. if num_commands < (bytes>>8)+2 {
  351. if float64(num_literals) > 0.99*float64(bytes) {
  352. var literal_histo = [256]uint32{0}
  353. const kSampleRate uint32 = 13
  354. const kMinEntropy float64 = 7.92
  355. var bit_cost_threshold float64 = float64(bytes) * kMinEntropy / float64(kSampleRate)
  356. var t uint = uint((uint32(bytes) + kSampleRate - 1) / kSampleRate)
  357. var pos uint32 = uint32(last_flush_pos)
  358. var i uint
  359. for i = 0; i < t; i++ {
  360. literal_histo[data[pos&uint32(mask)]]++
  361. pos += kSampleRate
  362. }
  363. if bitsEntropy(literal_histo[:], 256) > bit_cost_threshold {
  364. return false
  365. }
  366. }
  367. }
  368. return true
  369. }
  370. /* Chooses the literal context mode for a metablock */
  371. func chooseContextMode(params *encoderParams, data []byte, pos uint, mask uint, length uint) int {
  372. /* We only do the computation for the option of something else than
  373. CONTEXT_UTF8 for the highest qualities */
  374. if params.quality >= minQualityForHqBlockSplitting && !isMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio) {
  375. return contextSigned
  376. }
  377. return contextUTF8
  378. }
  379. func writeMetaBlockInternal(data []byte, mask uint, last_flush_pos uint64, bytes uint, is_last bool, literal_context_mode int, params *encoderParams, prev_byte byte, prev_byte2 byte, num_literals uint, commands []command, saved_dist_cache []int, dist_cache []int, storage_ix *uint, storage []byte) {
  380. var wrapped_last_flush_pos uint32 = wrapPosition(last_flush_pos)
  381. var last_bytes uint16
  382. var last_bytes_bits byte
  383. var literal_context_lut contextLUT = getContextLUT(literal_context_mode)
  384. var block_params encoderParams = *params
  385. if bytes == 0 {
  386. /* Write the ISLAST and ISEMPTY bits. */
  387. writeBits(2, 3, storage_ix, storage)
  388. *storage_ix = (*storage_ix + 7) &^ 7
  389. return
  390. }
  391. if !shouldCompress_encode(data, mask, last_flush_pos, bytes, num_literals, uint(len(commands))) {
  392. /* Restore the distance cache, as its last update by
  393. CreateBackwardReferences is now unused. */
  394. copy(dist_cache, saved_dist_cache[:4])
  395. storeUncompressedMetaBlock(is_last, data, uint(wrapped_last_flush_pos), mask, bytes, storage_ix, storage)
  396. return
  397. }
  398. assert(*storage_ix <= 14)
  399. last_bytes = uint16(storage[1])<<8 | uint16(storage[0])
  400. last_bytes_bits = byte(*storage_ix)
  401. if params.quality <= maxQualityForStaticEntropyCodes {
  402. storeMetaBlockFast(data, uint(wrapped_last_flush_pos), bytes, mask, is_last, params, commands, storage_ix, storage)
  403. } else if params.quality < minQualityForBlockSplit {
  404. storeMetaBlockTrivial(data, uint(wrapped_last_flush_pos), bytes, mask, is_last, params, commands, storage_ix, storage)
  405. } else {
  406. mb := getMetaBlockSplit()
  407. if params.quality < minQualityForHqBlockSplitting {
  408. var num_literal_contexts uint = 1
  409. var literal_context_map []uint32 = nil
  410. if !params.disable_literal_context_modeling {
  411. decideOverLiteralContextModeling(data, uint(wrapped_last_flush_pos), bytes, mask, params.quality, params.size_hint, &num_literal_contexts, &literal_context_map)
  412. }
  413. buildMetaBlockGreedy(data, uint(wrapped_last_flush_pos), mask, prev_byte, prev_byte2, literal_context_lut, num_literal_contexts, literal_context_map, commands, mb)
  414. } else {
  415. buildMetaBlock(data, uint(wrapped_last_flush_pos), mask, &block_params, prev_byte, prev_byte2, commands, literal_context_mode, mb)
  416. }
  417. if params.quality >= minQualityForOptimizeHistograms {
  418. /* The number of distance symbols effectively used for distance
  419. histograms. It might be less than distance alphabet size
  420. for "Large Window Brotli" (32-bit). */
  421. var num_effective_dist_codes uint32 = block_params.dist.alphabet_size
  422. if num_effective_dist_codes > numHistogramDistanceSymbols {
  423. num_effective_dist_codes = numHistogramDistanceSymbols
  424. }
  425. optimizeHistograms(num_effective_dist_codes, mb)
  426. }
  427. storeMetaBlock(data, uint(wrapped_last_flush_pos), bytes, mask, prev_byte, prev_byte2, is_last, &block_params, literal_context_mode, commands, mb, storage_ix, storage)
  428. freeMetaBlockSplit(mb)
  429. }
  430. if bytes+4 < *storage_ix>>3 {
  431. /* Restore the distance cache and last byte. */
  432. copy(dist_cache, saved_dist_cache[:4])
  433. storage[0] = byte(last_bytes)
  434. storage[1] = byte(last_bytes >> 8)
  435. *storage_ix = uint(last_bytes_bits)
  436. storeUncompressedMetaBlock(is_last, data, uint(wrapped_last_flush_pos), mask, bytes, storage_ix, storage)
  437. }
  438. }
  439. func chooseDistanceParams(params *encoderParams) {
  440. var distance_postfix_bits uint32 = 0
  441. var num_direct_distance_codes uint32 = 0
  442. if params.quality >= minQualityForNonzeroDistanceParams {
  443. var ndirect_msb uint32
  444. if params.mode == modeFont {
  445. distance_postfix_bits = 1
  446. num_direct_distance_codes = 12
  447. } else {
  448. distance_postfix_bits = params.dist.distance_postfix_bits
  449. num_direct_distance_codes = params.dist.num_direct_distance_codes
  450. }
  451. ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F
  452. if distance_postfix_bits > maxNpostfix || num_direct_distance_codes > maxNdirect || ndirect_msb<<distance_postfix_bits != num_direct_distance_codes {
  453. distance_postfix_bits = 0
  454. num_direct_distance_codes = 0
  455. }
  456. }
  457. initDistanceParams(params, distance_postfix_bits, num_direct_distance_codes)
  458. }
  459. func ensureInitialized(s *Writer) bool {
  460. if s.is_initialized_ {
  461. return true
  462. }
  463. s.last_bytes_bits_ = 0
  464. s.last_bytes_ = 0
  465. s.remaining_metadata_bytes_ = math.MaxUint32
  466. sanitizeParams(&s.params)
  467. s.params.lgblock = computeLgBlock(&s.params)
  468. chooseDistanceParams(&s.params)
  469. ringBufferSetup(&s.params, &s.ringbuffer_)
  470. /* Initialize last byte with stream header. */
  471. {
  472. var lgwin int = int(s.params.lgwin)
  473. if s.params.quality == fastOnePassCompressionQuality || s.params.quality == fastTwoPassCompressionQuality {
  474. lgwin = brotli_max_int(lgwin, 18)
  475. }
  476. encodeWindowBits(lgwin, s.params.large_window, &s.last_bytes_, &s.last_bytes_bits_)
  477. }
  478. if s.params.quality == fastOnePassCompressionQuality {
  479. s.cmd_depths_ = [128]byte{
  480. 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
  481. 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
  482. 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
  483. 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
  484. 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  485. 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
  486. 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
  487. 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
  488. }
  489. s.cmd_bits_ = [128]uint16{
  490. 0, 0, 8, 9, 3, 35, 7, 71,
  491. 39, 103, 23, 47, 175, 111, 239, 31,
  492. 0, 0, 0, 4, 12, 2, 10, 6,
  493. 13, 29, 11, 43, 27, 59, 87, 55,
  494. 15, 79, 319, 831, 191, 703, 447, 959,
  495. 0, 14, 1, 25, 5, 21, 19, 51,
  496. 119, 159, 95, 223, 479, 991, 63, 575,
  497. 127, 639, 383, 895, 255, 767, 511, 1023,
  498. 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  499. 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
  500. 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
  501. 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
  502. }
  503. s.cmd_code_ = [512]byte{
  504. 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
  505. 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
  506. 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
  507. 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
  508. 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
  509. }
  510. s.cmd_code_numbits_ = 448
  511. }
  512. s.is_initialized_ = true
  513. return true
  514. }
  515. func encoderInitParams(params *encoderParams) {
  516. params.mode = defaultMode
  517. params.large_window = false
  518. params.quality = defaultQuality
  519. params.lgwin = defaultWindow
  520. params.lgblock = 0
  521. params.size_hint = 0
  522. params.disable_literal_context_modeling = false
  523. initEncoderDictionary(&params.dictionary)
  524. params.dist.distance_postfix_bits = 0
  525. params.dist.num_direct_distance_codes = 0
  526. params.dist.alphabet_size = uint32(distanceAlphabetSize(0, 0, maxDistanceBits))
  527. params.dist.max_distance = maxDistance
  528. }
  529. func encoderInitState(s *Writer) {
  530. encoderInitParams(&s.params)
  531. s.input_pos_ = 0
  532. s.commands = s.commands[:0]
  533. s.num_literals_ = 0
  534. s.last_insert_len_ = 0
  535. s.last_flush_pos_ = 0
  536. s.last_processed_pos_ = 0
  537. s.prev_byte_ = 0
  538. s.prev_byte2_ = 0
  539. if s.hasher_ != nil {
  540. s.hasher_.Common().is_prepared_ = false
  541. }
  542. s.cmd_code_numbits_ = 0
  543. s.stream_state_ = streamProcessing
  544. s.is_last_block_emitted_ = false
  545. s.is_initialized_ = false
  546. ringBufferInit(&s.ringbuffer_)
  547. /* Initialize distance cache. */
  548. s.dist_cache_[0] = 4
  549. s.dist_cache_[1] = 11
  550. s.dist_cache_[2] = 15
  551. s.dist_cache_[3] = 16
  552. /* Save the state of the distance cache in case we need to restore it for
  553. emitting an uncompressed block. */
  554. copy(s.saved_dist_cache_[:], s.dist_cache_[:])
  555. }
  556. /*
  557. Copies the given input data to the internal ring buffer of the compressor.
  558. No processing of the data occurs at this time and this function can be
  559. called multiple times before calling WriteBrotliData() to process the
  560. accumulated input. At most input_block_size() bytes of input data can be
  561. copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
  562. */
  563. func copyInputToRingBuffer(s *Writer, input_size uint, input_buffer []byte) {
  564. var ringbuffer_ *ringBuffer = &s.ringbuffer_
  565. ringBufferWrite(input_buffer, input_size, ringbuffer_)
  566. s.input_pos_ += uint64(input_size)
  567. /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
  568. hashing not depend on uninitialized data. This makes compression
  569. deterministic and it prevents uninitialized memory warnings in Valgrind.
  570. Even without erasing, the output would be valid (but nondeterministic).
  571. Background information: The compressor stores short (at most 8 bytes)
  572. substrings of the input already read in a hash table, and detects
  573. repetitions by looking up such substrings in the hash table. If it
  574. can find a substring, it checks whether the substring is really there
  575. in the ring buffer (or it's just a hash collision). Should the hash
  576. table become corrupt, this check makes sure that the output is
  577. still valid, albeit the compression ratio would be bad.
  578. The compressor populates the hash table from the ring buffer as it's
  579. reading new bytes from the input. However, at the last few indexes of
  580. the ring buffer, there are not enough bytes to build full-length
  581. substrings from. Since the hash table always contains full-length
  582. substrings, we erase with dummy zeros here to make sure that those
  583. substrings will contain zeros at the end instead of uninitialized
  584. data.
  585. Please note that erasing is not necessary (because the
  586. memory region is already initialized since he ring buffer
  587. has a `tail' that holds a copy of the beginning,) so we
  588. skip erasing if we have already gone around at least once in
  589. the ring buffer.
  590. Only clear during the first round of ring-buffer writes. On
  591. subsequent rounds data in the ring-buffer would be affected. */
  592. if ringbuffer_.pos_ <= ringbuffer_.mask_ {
  593. /* This is the first time when the ring buffer is being written.
  594. We clear 7 bytes just after the bytes that have been copied from
  595. the input buffer.
  596. The ring-buffer has a "tail" that holds a copy of the beginning,
  597. but only once the ring buffer has been fully written once, i.e.,
  598. pos <= mask. For the first time, we need to write values
  599. in this tail (where index may be larger than mask), so that
  600. we have exactly defined behavior and don't read uninitialized
  601. memory. Due to performance reasons, hashing reads data using a
  602. LOAD64, which can go 7 bytes beyond the bytes written in the
  603. ring-buffer. */
  604. for i := 0; i < int(7); i++ {
  605. ringbuffer_.buffer_[ringbuffer_.pos_:][i] = 0
  606. }
  607. }
  608. }
  609. /* Marks all input as processed.
  610. Returns true if position wrapping occurs. */
  611. func updateLastProcessedPos(s *Writer) bool {
  612. var wrapped_last_processed_pos uint32 = wrapPosition(s.last_processed_pos_)
  613. var wrapped_input_pos uint32 = wrapPosition(s.input_pos_)
  614. s.last_processed_pos_ = s.input_pos_
  615. return wrapped_input_pos < wrapped_last_processed_pos
  616. }
  617. func extendLastCommand(s *Writer, bytes *uint32, wrapped_last_processed_pos *uint32) {
  618. var last_command *command = &s.commands[len(s.commands)-1]
  619. var data []byte = s.ringbuffer_.buffer_
  620. var mask uint32 = s.ringbuffer_.mask_
  621. var max_backward_distance uint64 = ((uint64(1)) << s.params.lgwin) - windowGap
  622. var last_copy_len uint64 = uint64(last_command.copy_len_) & 0x1FFFFFF
  623. var last_processed_pos uint64 = s.last_processed_pos_ - last_copy_len
  624. var max_distance uint64
  625. if last_processed_pos < max_backward_distance {
  626. max_distance = last_processed_pos
  627. } else {
  628. max_distance = max_backward_distance
  629. }
  630. var cmd_dist uint64 = uint64(s.dist_cache_[0])
  631. var distance_code uint32 = commandRestoreDistanceCode(last_command, &s.params.dist)
  632. if distance_code < numDistanceShortCodes || uint64(distance_code-(numDistanceShortCodes-1)) == cmd_dist {
  633. if cmd_dist <= max_distance {
  634. for *bytes != 0 && data[*wrapped_last_processed_pos&mask] == data[(uint64(*wrapped_last_processed_pos)-cmd_dist)&uint64(mask)] {
  635. last_command.copy_len_++
  636. (*bytes)--
  637. (*wrapped_last_processed_pos)++
  638. }
  639. }
  640. /* The copy length is at most the metablock size, and thus expressible. */
  641. getLengthCode(uint(last_command.insert_len_), uint(int(last_command.copy_len_&0x1FFFFFF)+int(last_command.copy_len_>>25)), (last_command.dist_prefix_&0x3FF == 0), &last_command.cmd_prefix_)
  642. }
  643. }
  644. /*
  645. Processes the accumulated input data and writes
  646. the new output meta-block to s.dest, if one has been
  647. created (otherwise the processed input data is buffered internally).
  648. If |is_last| or |force_flush| is true, an output meta-block is
  649. always created. However, until |is_last| is true encoder may retain up
  650. to 7 bits of the last byte of output. To force encoder to dump the remaining
  651. bits use WriteMetadata() to append an empty meta-data block.
  652. Returns false if the size of the input data is larger than
  653. input_block_size().
  654. */
  655. func encodeData(s *Writer, is_last bool, force_flush bool) bool {
  656. var delta uint64 = unprocessedInputSize(s)
  657. var bytes uint32 = uint32(delta)
  658. var wrapped_last_processed_pos uint32 = wrapPosition(s.last_processed_pos_)
  659. var data []byte
  660. var mask uint32
  661. var literal_context_mode int
  662. data = s.ringbuffer_.buffer_
  663. mask = s.ringbuffer_.mask_
  664. /* Adding more blocks after "last" block is forbidden. */
  665. if s.is_last_block_emitted_ {
  666. return false
  667. }
  668. if is_last {
  669. s.is_last_block_emitted_ = true
  670. }
  671. if delta > uint64(inputBlockSize(s)) {
  672. return false
  673. }
  674. if s.params.quality == fastTwoPassCompressionQuality {
  675. if s.command_buf_ == nil || cap(s.command_buf_) < int(kCompressFragmentTwoPassBlockSize) {
  676. s.command_buf_ = make([]uint32, kCompressFragmentTwoPassBlockSize)
  677. s.literal_buf_ = make([]byte, kCompressFragmentTwoPassBlockSize)
  678. } else {
  679. s.command_buf_ = s.command_buf_[:kCompressFragmentTwoPassBlockSize]
  680. s.literal_buf_ = s.literal_buf_[:kCompressFragmentTwoPassBlockSize]
  681. }
  682. }
  683. if s.params.quality == fastOnePassCompressionQuality || s.params.quality == fastTwoPassCompressionQuality {
  684. var storage []byte
  685. var storage_ix uint = uint(s.last_bytes_bits_)
  686. var table_size uint
  687. var table []int
  688. if delta == 0 && !is_last {
  689. /* We have no new input data and we don't have to finish the stream, so
  690. nothing to do. */
  691. return true
  692. }
  693. storage = s.getStorage(int(2*bytes + 503))
  694. storage[0] = byte(s.last_bytes_)
  695. storage[1] = byte(s.last_bytes_ >> 8)
  696. table = getHashTable(s, s.params.quality, uint(bytes), &table_size)
  697. if s.params.quality == fastOnePassCompressionQuality {
  698. compressFragmentFast(data[wrapped_last_processed_pos&mask:], uint(bytes), is_last, table, table_size, s.cmd_depths_[:], s.cmd_bits_[:], &s.cmd_code_numbits_, s.cmd_code_[:], &storage_ix, storage)
  699. } else {
  700. compressFragmentTwoPass(data[wrapped_last_processed_pos&mask:], uint(bytes), is_last, s.command_buf_, s.literal_buf_, table, table_size, &storage_ix, storage)
  701. }
  702. s.last_bytes_ = uint16(storage[storage_ix>>3])
  703. s.last_bytes_bits_ = byte(storage_ix & 7)
  704. updateLastProcessedPos(s)
  705. s.writeOutput(storage[:storage_ix>>3])
  706. return true
  707. }
  708. {
  709. /* Theoretical max number of commands is 1 per 2 bytes. */
  710. newsize := len(s.commands) + int(bytes)/2 + 1
  711. if newsize > cap(s.commands) {
  712. /* Reserve a bit more memory to allow merging with a next block
  713. without reallocation: that would impact speed. */
  714. newsize += int(bytes/4) + 16
  715. new_commands := make([]command, len(s.commands), newsize)
  716. if s.commands != nil {
  717. copy(new_commands, s.commands)
  718. }
  719. s.commands = new_commands
  720. }
  721. }
  722. initOrStitchToPreviousBlock(&s.hasher_, data, uint(mask), &s.params, uint(wrapped_last_processed_pos), uint(bytes), is_last)
  723. literal_context_mode = chooseContextMode(&s.params, data, uint(wrapPosition(s.last_flush_pos_)), uint(mask), uint(s.input_pos_-s.last_flush_pos_))
  724. if len(s.commands) != 0 && s.last_insert_len_ == 0 {
  725. extendLastCommand(s, &bytes, &wrapped_last_processed_pos)
  726. }
  727. if s.params.quality == zopflificationQuality {
  728. assert(s.params.hasher.type_ == 10)
  729. createZopfliBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_.(*h10), s.dist_cache_[:], &s.last_insert_len_, &s.commands, &s.num_literals_)
  730. } else if s.params.quality == hqZopflificationQuality {
  731. assert(s.params.hasher.type_ == 10)
  732. createHqZopfliBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_, s.dist_cache_[:], &s.last_insert_len_, &s.commands, &s.num_literals_)
  733. } else {
  734. createBackwardReferences(uint(bytes), uint(wrapped_last_processed_pos), data, uint(mask), &s.params, s.hasher_, s.dist_cache_[:], &s.last_insert_len_, &s.commands, &s.num_literals_)
  735. }
  736. {
  737. var max_length uint = maxMetablockSize(&s.params)
  738. var max_literals uint = max_length / 8
  739. max_commands := int(max_length / 8)
  740. var processed_bytes uint = uint(s.input_pos_ - s.last_flush_pos_)
  741. var next_input_fits_metablock bool = (processed_bytes+inputBlockSize(s) <= max_length)
  742. var should_flush bool = (s.params.quality < minQualityForBlockSplit && s.num_literals_+uint(len(s.commands)) >= maxNumDelayedSymbols)
  743. /* If maximal possible additional block doesn't fit metablock, flush now. */
  744. /* TODO: Postpone decision until next block arrives? */
  745. /* If block splitting is not used, then flush as soon as there is some
  746. amount of commands / literals produced. */
  747. if !is_last && !force_flush && !should_flush && next_input_fits_metablock && s.num_literals_ < max_literals && len(s.commands) < max_commands {
  748. /* Merge with next input block. Everything will happen later. */
  749. if updateLastProcessedPos(s) {
  750. hasherReset(s.hasher_)
  751. }
  752. return true
  753. }
  754. }
  755. /* Create the last insert-only command. */
  756. if s.last_insert_len_ > 0 {
  757. s.commands = append(s.commands, makeInsertCommand(s.last_insert_len_))
  758. s.num_literals_ += s.last_insert_len_
  759. s.last_insert_len_ = 0
  760. }
  761. if !is_last && s.input_pos_ == s.last_flush_pos_ {
  762. /* We have no new input data and we don't have to finish the stream, so
  763. nothing to do. */
  764. return true
  765. }
  766. assert(s.input_pos_ >= s.last_flush_pos_)
  767. assert(s.input_pos_ > s.last_flush_pos_ || is_last)
  768. assert(s.input_pos_-s.last_flush_pos_ <= 1<<24)
  769. {
  770. var metablock_size uint32 = uint32(s.input_pos_ - s.last_flush_pos_)
  771. var storage []byte = s.getStorage(int(2*metablock_size + 503))
  772. var storage_ix uint = uint(s.last_bytes_bits_)
  773. storage[0] = byte(s.last_bytes_)
  774. storage[1] = byte(s.last_bytes_ >> 8)
  775. writeMetaBlockInternal(data, uint(mask), s.last_flush_pos_, uint(metablock_size), is_last, literal_context_mode, &s.params, s.prev_byte_, s.prev_byte2_, s.num_literals_, s.commands, s.saved_dist_cache_[:], s.dist_cache_[:], &storage_ix, storage)
  776. s.last_bytes_ = uint16(storage[storage_ix>>3])
  777. s.last_bytes_bits_ = byte(storage_ix & 7)
  778. s.last_flush_pos_ = s.input_pos_
  779. if updateLastProcessedPos(s) {
  780. hasherReset(s.hasher_)
  781. }
  782. if s.last_flush_pos_ > 0 {
  783. s.prev_byte_ = data[(uint32(s.last_flush_pos_)-1)&mask]
  784. }
  785. if s.last_flush_pos_ > 1 {
  786. s.prev_byte2_ = data[uint32(s.last_flush_pos_-2)&mask]
  787. }
  788. s.commands = s.commands[:0]
  789. s.num_literals_ = 0
  790. /* Save the state of the distance cache in case we need to restore it for
  791. emitting an uncompressed block. */
  792. copy(s.saved_dist_cache_[:], s.dist_cache_[:])
  793. s.writeOutput(storage[:storage_ix>>3])
  794. return true
  795. }
  796. }
  797. /* Dumps remaining output bits and metadata header to |header|.
  798. Returns number of produced bytes.
  799. REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
  800. REQUIRED: |block_size| <= (1 << 24). */
  801. func writeMetadataHeader(s *Writer, block_size uint, header []byte) uint {
  802. storage_ix := uint(s.last_bytes_bits_)
  803. header[0] = byte(s.last_bytes_)
  804. header[1] = byte(s.last_bytes_ >> 8)
  805. s.last_bytes_ = 0
  806. s.last_bytes_bits_ = 0
  807. writeBits(1, 0, &storage_ix, header)
  808. writeBits(2, 3, &storage_ix, header)
  809. writeBits(1, 0, &storage_ix, header)
  810. if block_size == 0 {
  811. writeBits(2, 0, &storage_ix, header)
  812. } else {
  813. var nbits uint32
  814. if block_size == 1 {
  815. nbits = 0
  816. } else {
  817. nbits = log2FloorNonZero(uint(uint32(block_size)-1)) + 1
  818. }
  819. var nbytes uint32 = (nbits + 7) / 8
  820. writeBits(2, uint64(nbytes), &storage_ix, header)
  821. writeBits(uint(8*nbytes), uint64(block_size)-1, &storage_ix, header)
  822. }
  823. return (storage_ix + 7) >> 3
  824. }
  825. func injectBytePaddingBlock(s *Writer) {
  826. var seal uint32 = uint32(s.last_bytes_)
  827. var seal_bits uint = uint(s.last_bytes_bits_)
  828. s.last_bytes_ = 0
  829. s.last_bytes_bits_ = 0
  830. /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
  831. seal |= 0x6 << seal_bits
  832. seal_bits += 6
  833. destination := s.tiny_buf_.u8[:]
  834. destination[0] = byte(seal)
  835. if seal_bits > 8 {
  836. destination[1] = byte(seal >> 8)
  837. }
  838. if seal_bits > 16 {
  839. destination[2] = byte(seal >> 16)
  840. }
  841. s.writeOutput(destination[:(seal_bits+7)>>3])
  842. }
  843. func checkFlushComplete(s *Writer) {
  844. if s.stream_state_ == streamFlushRequested && s.err == nil {
  845. s.stream_state_ = streamProcessing
  846. }
  847. }
  848. func encoderCompressStreamFast(s *Writer, op int, available_in *uint, next_in *[]byte) bool {
  849. var block_size_limit uint = uint(1) << s.params.lgwin
  850. var buf_size uint = brotli_min_size_t(kCompressFragmentTwoPassBlockSize, brotli_min_size_t(*available_in, block_size_limit))
  851. var command_buf []uint32 = nil
  852. var literal_buf []byte = nil
  853. if s.params.quality != fastOnePassCompressionQuality && s.params.quality != fastTwoPassCompressionQuality {
  854. return false
  855. }
  856. if s.params.quality == fastTwoPassCompressionQuality {
  857. if s.command_buf_ == nil || cap(s.command_buf_) < int(buf_size) {
  858. s.command_buf_ = make([]uint32, buf_size)
  859. s.literal_buf_ = make([]byte, buf_size)
  860. } else {
  861. s.command_buf_ = s.command_buf_[:buf_size]
  862. s.literal_buf_ = s.literal_buf_[:buf_size]
  863. }
  864. command_buf = s.command_buf_
  865. literal_buf = s.literal_buf_
  866. }
  867. for {
  868. if s.stream_state_ == streamFlushRequested && s.last_bytes_bits_ != 0 {
  869. injectBytePaddingBlock(s)
  870. continue
  871. }
  872. /* Compress block only when stream is not
  873. finished, there is no pending flush request, and there is either
  874. additional input or pending operation. */
  875. if s.stream_state_ == streamProcessing && (*available_in != 0 || op != int(operationProcess)) {
  876. var block_size uint = brotli_min_size_t(block_size_limit, *available_in)
  877. var is_last bool = (*available_in == block_size) && (op == int(operationFinish))
  878. var force_flush bool = (*available_in == block_size) && (op == int(operationFlush))
  879. var max_out_size uint = 2*block_size + 503
  880. var storage []byte = nil
  881. var storage_ix uint = uint(s.last_bytes_bits_)
  882. var table_size uint
  883. var table []int
  884. if force_flush && block_size == 0 {
  885. s.stream_state_ = streamFlushRequested
  886. continue
  887. }
  888. storage = s.getStorage(int(max_out_size))
  889. storage[0] = byte(s.last_bytes_)
  890. storage[1] = byte(s.last_bytes_ >> 8)
  891. table = getHashTable(s, s.params.quality, block_size, &table_size)
  892. if s.params.quality == fastOnePassCompressionQuality {
  893. compressFragmentFast(*next_in, block_size, is_last, table, table_size, s.cmd_depths_[:], s.cmd_bits_[:], &s.cmd_code_numbits_, s.cmd_code_[:], &storage_ix, storage)
  894. } else {
  895. compressFragmentTwoPass(*next_in, block_size, is_last, command_buf, literal_buf, table, table_size, &storage_ix, storage)
  896. }
  897. *next_in = (*next_in)[block_size:]
  898. *available_in -= block_size
  899. var out_bytes uint = storage_ix >> 3
  900. s.writeOutput(storage[:out_bytes])
  901. s.last_bytes_ = uint16(storage[storage_ix>>3])
  902. s.last_bytes_bits_ = byte(storage_ix & 7)
  903. if force_flush {
  904. s.stream_state_ = streamFlushRequested
  905. }
  906. if is_last {
  907. s.stream_state_ = streamFinished
  908. }
  909. continue
  910. }
  911. break
  912. }
  913. checkFlushComplete(s)
  914. return true
  915. }
  916. func processMetadata(s *Writer, available_in *uint, next_in *[]byte) bool {
  917. if *available_in > 1<<24 {
  918. return false
  919. }
  920. /* Switch to metadata block workflow, if required. */
  921. if s.stream_state_ == streamProcessing {
  922. s.remaining_metadata_bytes_ = uint32(*available_in)
  923. s.stream_state_ = streamMetadataHead
  924. }
  925. if s.stream_state_ != streamMetadataHead && s.stream_state_ != streamMetadataBody {
  926. return false
  927. }
  928. for {
  929. if s.stream_state_ == streamFlushRequested && s.last_bytes_bits_ != 0 {
  930. injectBytePaddingBlock(s)
  931. continue
  932. }
  933. if s.input_pos_ != s.last_flush_pos_ {
  934. var result bool = encodeData(s, false, true)
  935. if !result {
  936. return false
  937. }
  938. continue
  939. }
  940. if s.stream_state_ == streamMetadataHead {
  941. n := writeMetadataHeader(s, uint(s.remaining_metadata_bytes_), s.tiny_buf_.u8[:])
  942. s.writeOutput(s.tiny_buf_.u8[:n])
  943. s.stream_state_ = streamMetadataBody
  944. continue
  945. } else {
  946. /* Exit workflow only when there is no more input and no more output.
  947. Otherwise client may continue producing empty metadata blocks. */
  948. if s.remaining_metadata_bytes_ == 0 {
  949. s.remaining_metadata_bytes_ = math.MaxUint32
  950. s.stream_state_ = streamProcessing
  951. break
  952. }
  953. /* This guarantees progress in "TakeOutput" workflow. */
  954. var c uint32 = brotli_min_uint32_t(s.remaining_metadata_bytes_, 16)
  955. copy(s.tiny_buf_.u8[:], (*next_in)[:c])
  956. *next_in = (*next_in)[c:]
  957. *available_in -= uint(c)
  958. s.remaining_metadata_bytes_ -= c
  959. s.writeOutput(s.tiny_buf_.u8[:c])
  960. continue
  961. }
  962. }
  963. return true
  964. }
  965. func updateSizeHint(s *Writer, available_in uint) {
  966. if s.params.size_hint == 0 {
  967. var delta uint64 = unprocessedInputSize(s)
  968. var tail uint64 = uint64(available_in)
  969. var limit uint32 = 1 << 30
  970. var total uint32
  971. if (delta >= uint64(limit)) || (tail >= uint64(limit)) || ((delta + tail) >= uint64(limit)) {
  972. total = limit
  973. } else {
  974. total = uint32(delta + tail)
  975. }
  976. s.params.size_hint = uint(total)
  977. }
  978. }
  979. func encoderCompressStream(s *Writer, op int, available_in *uint, next_in *[]byte) bool {
  980. if !ensureInitialized(s) {
  981. return false
  982. }
  983. /* Unfinished metadata block; check requirements. */
  984. if s.remaining_metadata_bytes_ != math.MaxUint32 {
  985. if uint32(*available_in) != s.remaining_metadata_bytes_ {
  986. return false
  987. }
  988. if op != int(operationEmitMetadata) {
  989. return false
  990. }
  991. }
  992. if op == int(operationEmitMetadata) {
  993. updateSizeHint(s, 0) /* First data metablock might be emitted here. */
  994. return processMetadata(s, available_in, next_in)
  995. }
  996. if s.stream_state_ == streamMetadataHead || s.stream_state_ == streamMetadataBody {
  997. return false
  998. }
  999. if s.stream_state_ != streamProcessing && *available_in != 0 {
  1000. return false
  1001. }
  1002. if s.params.quality == fastOnePassCompressionQuality || s.params.quality == fastTwoPassCompressionQuality {
  1003. return encoderCompressStreamFast(s, op, available_in, next_in)
  1004. }
  1005. for {
  1006. var remaining_block_size uint = remainingInputBlockSize(s)
  1007. if remaining_block_size != 0 && *available_in != 0 {
  1008. var copy_input_size uint = brotli_min_size_t(remaining_block_size, *available_in)
  1009. copyInputToRingBuffer(s, copy_input_size, *next_in)
  1010. *next_in = (*next_in)[copy_input_size:]
  1011. *available_in -= copy_input_size
  1012. continue
  1013. }
  1014. if s.stream_state_ == streamFlushRequested && s.last_bytes_bits_ != 0 {
  1015. injectBytePaddingBlock(s)
  1016. continue
  1017. }
  1018. /* Compress data only when stream is not
  1019. finished and there is no pending flush request. */
  1020. if s.stream_state_ == streamProcessing {
  1021. if remaining_block_size == 0 || op != int(operationProcess) {
  1022. var is_last bool = ((*available_in == 0) && op == int(operationFinish))
  1023. var force_flush bool = ((*available_in == 0) && op == int(operationFlush))
  1024. var result bool
  1025. updateSizeHint(s, *available_in)
  1026. result = encodeData(s, is_last, force_flush)
  1027. if !result {
  1028. return false
  1029. }
  1030. if force_flush {
  1031. s.stream_state_ = streamFlushRequested
  1032. }
  1033. if is_last {
  1034. s.stream_state_ = streamFinished
  1035. }
  1036. continue
  1037. }
  1038. }
  1039. break
  1040. }
  1041. checkFlushComplete(s)
  1042. return true
  1043. }
  1044. func (w *Writer) writeOutput(data []byte) {
  1045. if w.err != nil {
  1046. return
  1047. }
  1048. _, w.err = w.dst.Write(data)
  1049. if w.err == nil {
  1050. checkFlushComplete(w)
  1051. }
  1052. }