gitea源码

log_name_status.go 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package git
  4. import (
  5. "bufio"
  6. "bytes"
  7. "context"
  8. "errors"
  9. "io"
  10. "path"
  11. "sort"
  12. "strings"
  13. "code.gitea.io/gitea/modules/container"
  14. "code.gitea.io/gitea/modules/git/gitcmd"
  15. "github.com/djherbis/buffer"
  16. "github.com/djherbis/nio/v3"
  17. )
  18. // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
  19. func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
  20. // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
  21. // so let's create a batch stdin and stdout
  22. stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
  23. // Lets also create a context so that we can absolutely ensure that the command should die when we're done
  24. ctx, ctxCancel := context.WithCancel(ctx)
  25. cancel := func() {
  26. ctxCancel()
  27. _ = stdoutReader.Close()
  28. _ = stdoutWriter.Close()
  29. }
  30. cmd := gitcmd.NewCommand()
  31. cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head)
  32. var files []string
  33. if len(paths) < 70 {
  34. if treepath != "" {
  35. files = append(files, treepath)
  36. for _, pth := range paths {
  37. if pth != "" {
  38. files = append(files, path.Join(treepath, pth))
  39. }
  40. }
  41. } else {
  42. for _, pth := range paths {
  43. if pth != "" {
  44. files = append(files, pth)
  45. }
  46. }
  47. }
  48. } else if treepath != "" {
  49. files = append(files, treepath)
  50. }
  51. // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg"
  52. for i, file := range files {
  53. files[i] = ":(literal)" + file
  54. }
  55. cmd.AddDashesAndList(files...)
  56. go func() {
  57. stderr := strings.Builder{}
  58. err := cmd.Run(ctx, &gitcmd.RunOpts{
  59. Dir: repository,
  60. Stdout: stdoutWriter,
  61. Stderr: &stderr,
  62. })
  63. if err != nil {
  64. _ = stdoutWriter.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
  65. return
  66. }
  67. _ = stdoutWriter.Close()
  68. }()
  69. // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
  70. bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
  71. return bufReader, cancel
  72. }
  73. // LogNameStatusRepoParser parses a git log raw output from LogRawRepo
  74. type LogNameStatusRepoParser struct {
  75. treepath string
  76. paths []string
  77. next []byte
  78. buffull bool
  79. rd *bufio.Reader
  80. cancel func()
  81. }
  82. // NewLogNameStatusRepoParser returns a new parser for a git log raw output
  83. func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
  84. rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
  85. return &LogNameStatusRepoParser{
  86. treepath: treepath,
  87. paths: paths,
  88. rd: rd,
  89. cancel: cancel,
  90. }
  91. }
  92. // LogNameStatusCommitData represents a commit artefact from git log raw
  93. type LogNameStatusCommitData struct {
  94. CommitID string
  95. ParentIDs []string
  96. Paths []bool
  97. }
  98. // Next returns the next LogStatusCommitData
  99. func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
  100. var err error
  101. if len(g.next) == 0 {
  102. g.buffull = false
  103. g.next, err = g.rd.ReadSlice('\x00')
  104. if err != nil {
  105. switch err {
  106. case bufio.ErrBufferFull:
  107. g.buffull = true
  108. case io.EOF:
  109. return nil, nil
  110. default:
  111. return nil, err
  112. }
  113. }
  114. }
  115. ret := LogNameStatusCommitData{}
  116. if bytes.Equal(g.next, []byte("commit\000")) {
  117. g.next, err = g.rd.ReadSlice('\x00')
  118. if err != nil {
  119. switch err {
  120. case bufio.ErrBufferFull:
  121. g.buffull = true
  122. case io.EOF:
  123. return nil, nil
  124. default:
  125. return nil, err
  126. }
  127. }
  128. }
  129. // Our "line" must look like: <commitid> SP (<parent> SP) * NUL
  130. commitIDs := string(g.next)
  131. if g.buffull {
  132. more, err := g.rd.ReadString('\x00')
  133. if err != nil {
  134. return nil, err
  135. }
  136. commitIDs += more
  137. }
  138. commitIDs = commitIDs[:len(commitIDs)-1]
  139. splitIDs := strings.Split(commitIDs, " ")
  140. ret.CommitID = splitIDs[0]
  141. if len(splitIDs) > 1 {
  142. ret.ParentIDs = splitIDs[1:]
  143. }
  144. // now read the next "line"
  145. g.buffull = false
  146. g.next, err = g.rd.ReadSlice('\x00')
  147. if err != nil {
  148. if err == bufio.ErrBufferFull {
  149. g.buffull = true
  150. } else if err != io.EOF {
  151. return nil, err
  152. }
  153. }
  154. if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
  155. return &ret, nil
  156. }
  157. // Ok we have some changes.
  158. // This line will look like: NL <fname> NUL
  159. //
  160. // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
  161. if g.next[0] == '\n' {
  162. g.next = g.next[1:]
  163. } else {
  164. g.buffull = false
  165. g.next, err = g.rd.ReadSlice('\x00')
  166. if err != nil {
  167. if err == bufio.ErrBufferFull {
  168. g.buffull = true
  169. } else if err != io.EOF {
  170. return nil, err
  171. }
  172. }
  173. if len(g.next) == 0 {
  174. return &ret, nil
  175. }
  176. if g.next[0] == '\x00' {
  177. g.buffull = false
  178. g.next, err = g.rd.ReadSlice('\x00')
  179. if err != nil {
  180. if err == bufio.ErrBufferFull {
  181. g.buffull = true
  182. } else if err != io.EOF {
  183. return nil, err
  184. }
  185. }
  186. }
  187. }
  188. fnameBuf := make([]byte, 4096)
  189. diffloop:
  190. for {
  191. if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
  192. return &ret, nil
  193. }
  194. g.next, err = g.rd.ReadSlice('\x00')
  195. if err != nil {
  196. switch err {
  197. case bufio.ErrBufferFull:
  198. g.buffull = true
  199. case io.EOF:
  200. return &ret, nil
  201. default:
  202. return nil, err
  203. }
  204. }
  205. copy(fnameBuf, g.next)
  206. if len(fnameBuf) < len(g.next) {
  207. fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
  208. } else {
  209. fnameBuf = fnameBuf[:len(g.next)]
  210. }
  211. if err != nil {
  212. if err != bufio.ErrBufferFull {
  213. return nil, err
  214. }
  215. more, err := g.rd.ReadBytes('\x00')
  216. if err != nil {
  217. return nil, err
  218. }
  219. fnameBuf = append(fnameBuf, more...)
  220. }
  221. // read the next line
  222. g.buffull = false
  223. g.next, err = g.rd.ReadSlice('\x00')
  224. if err != nil {
  225. if err == bufio.ErrBufferFull {
  226. g.buffull = true
  227. } else if err != io.EOF {
  228. return nil, err
  229. }
  230. }
  231. if treepath != "" {
  232. if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
  233. fnameBuf = fnameBuf[:cap(fnameBuf)]
  234. continue diffloop
  235. }
  236. }
  237. fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
  238. if len(fnameBuf) > maxpathlen {
  239. fnameBuf = fnameBuf[:cap(fnameBuf)]
  240. continue diffloop
  241. }
  242. if len(fnameBuf) > 0 {
  243. if len(treepath) > 0 {
  244. if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
  245. fnameBuf = fnameBuf[:cap(fnameBuf)]
  246. continue diffloop
  247. }
  248. fnameBuf = fnameBuf[1:]
  249. } else if bytes.IndexByte(fnameBuf, '/') >= 0 {
  250. fnameBuf = fnameBuf[:cap(fnameBuf)]
  251. continue diffloop
  252. }
  253. }
  254. idx, ok := paths2ids[string(fnameBuf)]
  255. if !ok {
  256. fnameBuf = fnameBuf[:cap(fnameBuf)]
  257. continue diffloop
  258. }
  259. if ret.Paths == nil {
  260. ret.Paths = changed
  261. }
  262. changed[idx] = true
  263. }
  264. }
  265. // Close closes the parser
  266. func (g *LogNameStatusRepoParser) Close() {
  267. g.cancel()
  268. }
  269. // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
  270. func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
  271. headRef := head.ID.String()
  272. tree, err := head.SubTree(treepath)
  273. if err != nil {
  274. return nil, err
  275. }
  276. entries, err := tree.ListEntries()
  277. if err != nil {
  278. return nil, err
  279. }
  280. if len(paths) == 0 {
  281. paths = make([]string, 0, len(entries)+1)
  282. paths = append(paths, "")
  283. for _, entry := range entries {
  284. paths = append(paths, entry.Name())
  285. }
  286. } else {
  287. sort.Strings(paths)
  288. if paths[0] != "" {
  289. paths = append([]string{""}, paths...)
  290. }
  291. // remove duplicates
  292. for i := len(paths) - 1; i > 0; i-- {
  293. if paths[i] == paths[i-1] {
  294. paths = append(paths[:i-1], paths[i:]...)
  295. }
  296. }
  297. }
  298. path2idx := map[string]int{}
  299. maxpathlen := len(treepath)
  300. for i := range paths {
  301. path2idx[paths[i]] = i
  302. pthlen := len(paths[i]) + len(treepath) + 1
  303. if pthlen > maxpathlen {
  304. maxpathlen = pthlen
  305. }
  306. }
  307. g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
  308. // don't use defer g.Close() here as g may change its value - instead wrap in a func
  309. defer func() {
  310. g.Close()
  311. }()
  312. results := make([]string, len(paths))
  313. remaining := len(paths)
  314. nextRestart := min((len(paths)*3)/4, 70)
  315. lastEmptyParent := head.ID.String()
  316. commitSinceLastEmptyParent := uint64(0)
  317. commitSinceNextRestart := uint64(0)
  318. parentRemaining := make(container.Set[string])
  319. changed := make([]bool, len(paths))
  320. heaploop:
  321. for {
  322. select {
  323. case <-ctx.Done():
  324. if ctx.Err() == context.DeadlineExceeded {
  325. break heaploop
  326. }
  327. g.Close()
  328. return nil, ctx.Err()
  329. default:
  330. }
  331. current, err := g.Next(treepath, path2idx, changed, maxpathlen)
  332. if err != nil {
  333. if errors.Is(err, context.DeadlineExceeded) {
  334. break heaploop
  335. }
  336. g.Close()
  337. return nil, err
  338. }
  339. if current == nil {
  340. break heaploop
  341. }
  342. parentRemaining.Remove(current.CommitID)
  343. for i, found := range current.Paths {
  344. if !found {
  345. continue
  346. }
  347. changed[i] = false
  348. if results[i] == "" {
  349. results[i] = current.CommitID
  350. if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
  351. return nil, err
  352. }
  353. delete(path2idx, paths[i])
  354. remaining--
  355. if results[0] == "" {
  356. results[0] = current.CommitID
  357. if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil {
  358. return nil, err
  359. }
  360. delete(path2idx, "")
  361. remaining--
  362. }
  363. }
  364. }
  365. if remaining <= 0 {
  366. break heaploop
  367. }
  368. commitSinceLastEmptyParent++
  369. if len(parentRemaining) == 0 {
  370. lastEmptyParent = current.CommitID
  371. commitSinceLastEmptyParent = 0
  372. }
  373. if remaining <= nextRestart {
  374. commitSinceNextRestart++
  375. if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
  376. g.Close()
  377. remainingPaths := make([]string, 0, len(paths))
  378. for i, pth := range paths {
  379. if results[i] == "" {
  380. remainingPaths = append(remainingPaths, pth)
  381. }
  382. }
  383. g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
  384. parentRemaining = make(container.Set[string])
  385. nextRestart = (remaining * 3) / 4
  386. continue heaploop
  387. }
  388. }
  389. parentRemaining.AddMultiple(current.ParentIDs...)
  390. }
  391. g.Close()
  392. resultsMap := map[string]string{}
  393. for i, pth := range paths {
  394. resultsMap[pth] = results[i]
  395. }
  396. return resultsMap, nil
  397. }