gitea源码

mdstripper.go 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package mdstripper
  4. import (
  5. "bytes"
  6. "io"
  7. "net/url"
  8. "strings"
  9. "sync"
  10. "code.gitea.io/gitea/modules/log"
  11. "code.gitea.io/gitea/modules/markup/common"
  12. "code.gitea.io/gitea/modules/setting"
  13. "github.com/yuin/goldmark"
  14. "github.com/yuin/goldmark/ast"
  15. "github.com/yuin/goldmark/extension"
  16. "github.com/yuin/goldmark/parser"
  17. "github.com/yuin/goldmark/renderer"
  18. "github.com/yuin/goldmark/renderer/html"
  19. "github.com/yuin/goldmark/text"
  20. )
  21. var (
  22. giteaHostInit sync.Once
  23. giteaHost *url.URL
  24. )
  25. type stripRenderer struct {
  26. localhost *url.URL
  27. links []string
  28. empty bool
  29. }
  30. func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error {
  31. return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
  32. if !entering {
  33. return ast.WalkContinue, nil
  34. }
  35. switch v := n.(type) {
  36. case *ast.Text:
  37. if !v.IsRaw() {
  38. _, prevSibIsText := n.PreviousSibling().(*ast.Text)
  39. coalesce := prevSibIsText
  40. r.processString(
  41. w,
  42. v.Text(source), //nolint:staticcheck // Text is deprecated
  43. coalesce)
  44. if v.SoftLineBreak() {
  45. r.doubleSpace(w)
  46. }
  47. }
  48. return ast.WalkContinue, nil
  49. case *ast.Link:
  50. r.processLink(v.Destination)
  51. return ast.WalkSkipChildren, nil
  52. case *ast.AutoLink:
  53. // This could be a reference to an issue or pull - if so convert it
  54. r.processAutoLink(w, v.URL(source))
  55. return ast.WalkSkipChildren, nil
  56. }
  57. return ast.WalkContinue, nil
  58. })
  59. }
  60. func (r *stripRenderer) doubleSpace(w io.Writer) {
  61. if !r.empty {
  62. _, _ = w.Write([]byte{'\n'})
  63. }
  64. }
  65. func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) {
  66. // Always break-up words
  67. if !coalesce {
  68. r.doubleSpace(w)
  69. }
  70. _, _ = w.Write(text)
  71. r.empty = false
  72. }
  73. // ProcessAutoLinks to detect and handle links to issues and pulls
  74. func (r *stripRenderer) processAutoLink(w io.Writer, link []byte) {
  75. linkStr := string(link)
  76. u, err := url.Parse(linkStr)
  77. if err != nil {
  78. // Process out of band
  79. r.links = append(r.links, linkStr)
  80. return
  81. }
  82. // Note: we're not attempting to match the URL scheme (http/https)
  83. if u.Host != "" && !strings.EqualFold(u.Host, r.localhost.Host) {
  84. // Process out of band
  85. r.links = append(r.links, linkStr)
  86. return
  87. }
  88. // We want: /user/repo/issues/3
  89. parts := strings.Split(strings.TrimPrefix(u.EscapedPath(), r.localhost.EscapedPath()), "/")
  90. if len(parts) != 5 || parts[0] != "" {
  91. // Process out of band
  92. r.links = append(r.links, linkStr)
  93. return
  94. }
  95. var sep string
  96. switch parts[3] {
  97. case "issues":
  98. sep = "#"
  99. case "pulls":
  100. sep = "!"
  101. default:
  102. // Process out of band
  103. r.links = append(r.links, linkStr)
  104. return
  105. }
  106. _, _ = w.Write([]byte(parts[1]))
  107. _, _ = w.Write([]byte("/"))
  108. _, _ = w.Write([]byte(parts[2]))
  109. _, _ = w.Write([]byte(sep))
  110. _, _ = w.Write([]byte(parts[4]))
  111. }
  112. func (r *stripRenderer) processLink(link []byte) {
  113. // Links are processed out of band
  114. r.links = append(r.links, string(link))
  115. }
  116. // GetLinks returns the list of link data collected while parsing
  117. func (r *stripRenderer) GetLinks() []string {
  118. return r.links
  119. }
  120. // AddOptions adds given option to this renderer.
  121. func (r *stripRenderer) AddOptions(...renderer.Option) {
  122. // no-op
  123. }
  124. // StripMarkdown parses markdown content by removing all markup and code blocks
  125. // in order to extract links and other references
  126. func StripMarkdown(rawBytes []byte) (string, []string) {
  127. buf, links := StripMarkdownBytes(rawBytes)
  128. return string(buf), links
  129. }
  130. var (
  131. stripParser parser.Parser
  132. once = sync.Once{}
  133. )
  134. // StripMarkdownBytes parses markdown content by removing all markup and code blocks
  135. // in order to extract links and other references
  136. func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
  137. once.Do(func() {
  138. gdMarkdown := goldmark.New(
  139. goldmark.WithExtensions(extension.Table,
  140. extension.Strikethrough,
  141. extension.TaskList,
  142. extension.DefinitionList,
  143. common.FootnoteExtension,
  144. common.Linkify,
  145. ),
  146. goldmark.WithParserOptions(
  147. parser.WithAttribute(),
  148. parser.WithAutoHeadingID(),
  149. ),
  150. goldmark.WithRendererOptions(
  151. html.WithUnsafe(),
  152. ),
  153. )
  154. stripParser = gdMarkdown.Parser()
  155. })
  156. stripper := &stripRenderer{
  157. localhost: getGiteaHost(),
  158. links: make([]string, 0, 10),
  159. empty: true,
  160. }
  161. reader := text.NewReader(rawBytes)
  162. doc := stripParser.Parse(reader)
  163. var buf bytes.Buffer
  164. if err := stripper.Render(&buf, rawBytes, doc); err != nil {
  165. log.Error("Unable to strip: %v", err)
  166. }
  167. return buf.Bytes(), stripper.GetLinks()
  168. }
  169. // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
  170. func getGiteaHost() *url.URL {
  171. giteaHostInit.Do(func() {
  172. var err error
  173. if giteaHost, err = url.Parse(setting.AppURL); err != nil {
  174. giteaHost = &url.URL{}
  175. }
  176. })
  177. return giteaHost
  178. }