123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- package uniseg
- // The Unicode properties as used in the various parsers. Only the ones needed
- // in the context of this package are included.
- const (
- prXX = 0 // Same as prAny.
- prAny = iota // prAny must be 0.
- prPrepend // Grapheme properties must come first, to reduce the number of bits stored in the state vector.
- prCR
- prLF
- prControl
- prExtend
- prRegionalIndicator
- prSpacingMark
- prL
- prV
- prT
- prLV
- prLVT
- prZWJ
- prExtendedPictographic
- prNewline
- prWSegSpace
- prDoubleQuote
- prSingleQuote
- prMidNumLet
- prNumeric
- prMidLetter
- prMidNum
- prExtendNumLet
- prALetter
- prFormat
- prHebrewLetter
- prKatakana
- prSp
- prSTerm
- prClose
- prSContinue
- prATerm
- prUpper
- prLower
- prSep
- prOLetter
- prCM
- prBA
- prBK
- prSP
- prEX
- prQU
- prAL
- prPR
- prPO
- prOP
- prCP
- prIS
- prHY
- prSY
- prNU
- prCL
- prNL
- prGL
- prAI
- prBB
- prHL
- prSA
- prJL
- prJV
- prJT
- prNS
- prZW
- prB2
- prIN
- prWJ
- prID
- prEB
- prCJ
- prH2
- prH3
- prSG
- prCB
- prRI
- prEM
- prN
- prNa
- prA
- prW
- prH
- prF
- prEmojiPresentation
- )
- // Unicode General Categories. Only the ones needed in the context of this
- // package are included.
- const (
- gcNone = iota // gcNone must be 0.
- gcCc
- gcZs
- gcPo
- gcSc
- gcPs
- gcPe
- gcSm
- gcPd
- gcNd
- gcLu
- gcSk
- gcPc
- gcLl
- gcSo
- gcLo
- gcPi
- gcCf
- gcNo
- gcPf
- gcLC
- gcLm
- gcMn
- gcMe
- gcMc
- gcNl
- gcZl
- gcZp
- gcCn
- gcCs
- gcCo
- )
- // Special code points.
- const (
- vs15 = 0xfe0e // Variation Selector-15 (text presentation)
- vs16 = 0xfe0f // Variation Selector-16 (emoji presentation)
- )
- // propertySearch performs a binary search on a property slice and returns the
- // entry whose range (start = first array element, end = second array element)
- // includes r, or an array of 0's if no such entry was found.
- func propertySearch[E interface{ [3]int | [4]int }](dictionary []E, r rune) (result E) {
- // Run a binary search.
- from := 0
- to := len(dictionary)
- for to > from {
- middle := (from + to) / 2
- cpRange := dictionary[middle]
- if int(r) < cpRange[0] {
- to = middle
- continue
- }
- if int(r) > cpRange[1] {
- from = middle + 1
- continue
- }
- return cpRange
- }
- return
- }
- // property returns the Unicode property value (see constants above) of the
- // given code point.
- func property(dictionary [][3]int, r rune) int {
- return propertySearch(dictionary, r)[2]
- }
- // propertyLineBreak returns the Unicode property value and General Category
- // (see constants above) of the given code point, as listed in the line break
- // code points table, while fast tracking ASCII digits and letters.
- func propertyLineBreak(r rune) (property, generalCategory int) {
- if r >= 'a' && r <= 'z' {
- return prAL, gcLl
- }
- if r >= 'A' && r <= 'Z' {
- return prAL, gcLu
- }
- if r >= '0' && r <= '9' {
- return prNU, gcNd
- }
- entry := propertySearch(lineBreakCodePoints, r)
- return entry[2], entry[3]
- }
- // propertyGraphemes returns the Unicode grapheme cluster property value of the
- // given code point while fast tracking ASCII characters.
- func propertyGraphemes(r rune) int {
- if r >= 0x20 && r <= 0x7e {
- return prAny
- }
- if r == 0x0a {
- return prLF
- }
- if r == 0x0d {
- return prCR
- }
- if r >= 0 && r <= 0x1f || r == 0x7f {
- return prControl
- }
- return property(graphemeCodePoints, r)
- }
- // propertyEastAsianWidth returns the Unicode East Asian Width property value of
- // the given code point while fast tracking ASCII characters.
- func propertyEastAsianWidth(r rune) int {
- if r >= 0x20 && r <= 0x7e {
- return prNa
- }
- if r >= 0 && r <= 0x1f || r == 0x7f {
- return prN
- }
- return property(eastAsianWidth, r)
- }
|