gitea源码

highlight.go 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. // Copyright 2015 The Gogs Authors. All rights reserved.
  2. // Copyright 2020 The Gitea Authors. All rights reserved.
  3. // SPDX-License-Identifier: MIT
  4. package highlight
  5. import (
  6. "bufio"
  7. "bytes"
  8. "fmt"
  9. gohtml "html"
  10. "html/template"
  11. "io"
  12. "path"
  13. "path/filepath"
  14. "strings"
  15. "sync"
  16. "code.gitea.io/gitea/modules/analyze"
  17. "code.gitea.io/gitea/modules/log"
  18. "code.gitea.io/gitea/modules/setting"
  19. "code.gitea.io/gitea/modules/util"
  20. "github.com/alecthomas/chroma/v2"
  21. "github.com/alecthomas/chroma/v2/formatters/html"
  22. "github.com/alecthomas/chroma/v2/lexers"
  23. "github.com/alecthomas/chroma/v2/styles"
  24. lru "github.com/hashicorp/golang-lru/v2"
  25. )
  26. // don't index files larger than this many bytes for performance purposes
  27. const sizeLimit = 1024 * 1024
  28. var (
  29. // For custom user mapping
  30. highlightMapping = map[string]string{}
  31. once sync.Once
  32. cache *lru.TwoQueueCache[string, any]
  33. githubStyles = styles.Get("github")
  34. )
  35. // NewContext loads custom highlight map from local config
  36. func NewContext() {
  37. once.Do(func() {
  38. highlightMapping = setting.GetHighlightMapping()
  39. // The size 512 is simply a conservative rule of thumb
  40. c, err := lru.New2Q[string, any](512)
  41. if err != nil {
  42. panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
  43. }
  44. cache = c
  45. })
  46. }
  47. // Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
  48. func Code(fileName, language, code string) (output template.HTML, lexerName string) {
  49. NewContext()
  50. // diff view newline will be passed as empty, change to literal '\n' so it can be copied
  51. // preserve literal newline in blame view
  52. if code == "" || code == "\n" {
  53. return "\n", ""
  54. }
  55. if len(code) > sizeLimit {
  56. return template.HTML(template.HTMLEscapeString(code)), ""
  57. }
  58. var lexer chroma.Lexer
  59. if len(language) > 0 {
  60. lexer = lexers.Get(language)
  61. if lexer == nil {
  62. // Attempt stripping off the '?'
  63. if idx := strings.IndexByte(language, '?'); idx > 0 {
  64. lexer = lexers.Get(language[:idx])
  65. }
  66. }
  67. }
  68. if lexer == nil {
  69. if val, ok := highlightMapping[path.Ext(fileName)]; ok {
  70. // use mapped value to find lexer
  71. lexer = lexers.Get(val)
  72. }
  73. }
  74. if lexer == nil {
  75. if l, ok := cache.Get(fileName); ok {
  76. lexer = l.(chroma.Lexer)
  77. }
  78. }
  79. if lexer == nil {
  80. lexer = lexers.Match(fileName)
  81. if lexer == nil {
  82. lexer = lexers.Fallback
  83. }
  84. cache.Add(fileName, lexer)
  85. }
  86. return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
  87. }
  88. // CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
  89. func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
  90. formatter := html.New(html.WithClasses(true),
  91. html.WithLineNumbers(false),
  92. html.PreventSurroundingPre(true),
  93. )
  94. htmlbuf := bytes.Buffer{}
  95. htmlw := bufio.NewWriter(&htmlbuf)
  96. iterator, err := lexer.Tokenise(nil, code)
  97. if err != nil {
  98. log.Error("Can't tokenize code: %v", err)
  99. return template.HTML(template.HTMLEscapeString(code))
  100. }
  101. // style not used for live site but need to pass something
  102. err = formatter.Format(htmlw, githubStyles, iterator)
  103. if err != nil {
  104. log.Error("Can't format code: %v", err)
  105. return template.HTML(template.HTMLEscapeString(code))
  106. }
  107. _ = htmlw.Flush()
  108. // Chroma will add newlines for certain lexers in order to highlight them properly
  109. // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
  110. return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
  111. }
  112. // File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
  113. func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
  114. NewContext()
  115. if len(code) > sizeLimit {
  116. return PlainText(code), "", nil
  117. }
  118. formatter := html.New(html.WithClasses(true),
  119. html.WithLineNumbers(false),
  120. html.PreventSurroundingPre(true),
  121. )
  122. var lexer chroma.Lexer
  123. // provided language overrides everything
  124. if language != "" {
  125. lexer = lexers.Get(language)
  126. }
  127. if lexer == nil {
  128. if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
  129. lexer = lexers.Get(val)
  130. }
  131. }
  132. if lexer == nil {
  133. guessLanguage := analyze.GetCodeLanguage(fileName, code)
  134. lexer = lexers.Get(guessLanguage)
  135. if lexer == nil {
  136. lexer = lexers.Match(fileName)
  137. if lexer == nil {
  138. lexer = lexers.Fallback
  139. }
  140. }
  141. }
  142. lexerName := formatLexerName(lexer.Config().Name)
  143. iterator, err := lexer.Tokenise(nil, string(code))
  144. if err != nil {
  145. return nil, "", fmt.Errorf("can't tokenize code: %w", err)
  146. }
  147. tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
  148. htmlBuf := &bytes.Buffer{}
  149. lines := make([]template.HTML, 0, len(tokensLines))
  150. for _, tokens := range tokensLines {
  151. iterator = chroma.Literator(tokens...)
  152. err = formatter.Format(htmlBuf, githubStyles, iterator)
  153. if err != nil {
  154. return nil, "", fmt.Errorf("can't format code: %w", err)
  155. }
  156. lines = append(lines, template.HTML(htmlBuf.String()))
  157. htmlBuf.Reset()
  158. }
  159. return lines, lexerName, nil
  160. }
  161. // PlainText returns non-highlighted HTML for code
  162. func PlainText(code []byte) []template.HTML {
  163. r := bufio.NewReader(bytes.NewReader(code))
  164. m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
  165. for {
  166. content, err := r.ReadString('\n')
  167. if err != nil && err != io.EOF {
  168. log.Error("failed to read string from buffer: %v", err)
  169. break
  170. }
  171. if content == "" && err == io.EOF {
  172. break
  173. }
  174. s := template.HTML(gohtml.EscapeString(content))
  175. m = append(m, s)
  176. }
  177. return m
  178. }
  179. func formatLexerName(name string) string {
  180. if name == "fallback" {
  181. return "Plaintext"
  182. }
  183. return util.ToTitleCaseNoLower(name)
  184. }