gitea源码

batch_reader.go 8.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package git
  4. import (
  5. "bufio"
  6. "bytes"
  7. "context"
  8. "io"
  9. "math"
  10. "strconv"
  11. "strings"
  12. "code.gitea.io/gitea/modules/git/gitcmd"
  13. "code.gitea.io/gitea/modules/log"
  14. "github.com/djherbis/buffer"
  15. "github.com/djherbis/nio/v3"
  16. )
  17. // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
  18. type WriteCloserError interface {
  19. io.WriteCloser
  20. CloseWithError(err error) error
  21. }
  22. // ensureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository.
  23. // Run before opening git cat-file.
  24. // This is needed otherwise the git cat-file will hang for invalid repositories.
  25. func ensureValidGitRepository(ctx context.Context, repoPath string) error {
  26. stderr := strings.Builder{}
  27. err := gitcmd.NewCommand("rev-parse").
  28. Run(ctx, &gitcmd.RunOpts{
  29. Dir: repoPath,
  30. Stderr: &stderr,
  31. })
  32. if err != nil {
  33. return gitcmd.ConcatenateError(err, (&stderr).String())
  34. }
  35. return nil
  36. }
  37. // catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
  38. func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
  39. batchStdinReader, batchStdinWriter := io.Pipe()
  40. batchStdoutReader, batchStdoutWriter := io.Pipe()
  41. ctx, ctxCancel := context.WithCancel(ctx)
  42. closed := make(chan struct{})
  43. cancel := func() {
  44. ctxCancel()
  45. _ = batchStdoutReader.Close()
  46. _ = batchStdinWriter.Close()
  47. <-closed
  48. }
  49. // Ensure cancel is called as soon as the provided context is cancelled
  50. go func() {
  51. <-ctx.Done()
  52. cancel()
  53. }()
  54. go func() {
  55. stderr := strings.Builder{}
  56. err := gitcmd.NewCommand("cat-file", "--batch-check").
  57. Run(ctx, &gitcmd.RunOpts{
  58. Dir: repoPath,
  59. Stdin: batchStdinReader,
  60. Stdout: batchStdoutWriter,
  61. Stderr: &stderr,
  62. UseContextTimeout: true,
  63. })
  64. if err != nil {
  65. _ = batchStdoutWriter.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
  66. _ = batchStdinReader.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
  67. } else {
  68. _ = batchStdoutWriter.Close()
  69. _ = batchStdinReader.Close()
  70. }
  71. close(closed)
  72. }()
  73. // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
  74. batchReader := bufio.NewReader(batchStdoutReader)
  75. return batchStdinWriter, batchReader, cancel
  76. }
  77. // catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
  78. func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
  79. // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
  80. // so let's create a batch stdin and stdout
  81. batchStdinReader, batchStdinWriter := io.Pipe()
  82. batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
  83. ctx, ctxCancel := context.WithCancel(ctx)
  84. closed := make(chan struct{})
  85. cancel := func() {
  86. ctxCancel()
  87. _ = batchStdinWriter.Close()
  88. _ = batchStdoutReader.Close()
  89. <-closed
  90. }
  91. // Ensure cancel is called as soon as the provided context is cancelled
  92. go func() {
  93. <-ctx.Done()
  94. cancel()
  95. }()
  96. go func() {
  97. stderr := strings.Builder{}
  98. err := gitcmd.NewCommand("cat-file", "--batch").
  99. Run(ctx, &gitcmd.RunOpts{
  100. Dir: repoPath,
  101. Stdin: batchStdinReader,
  102. Stdout: batchStdoutWriter,
  103. Stderr: &stderr,
  104. UseContextTimeout: true,
  105. })
  106. if err != nil {
  107. _ = batchStdoutWriter.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
  108. _ = batchStdinReader.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
  109. } else {
  110. _ = batchStdoutWriter.Close()
  111. _ = batchStdinReader.Close()
  112. }
  113. close(closed)
  114. }()
  115. // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
  116. batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
  117. return batchStdinWriter, batchReader, cancel
  118. }
  119. // ReadBatchLine reads the header line from cat-file --batch
  120. // We expect: <oid> SP <type> SP <size> LF
  121. // then leaving the rest of the stream "<contents> LF" to be read
  122. func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
  123. typ, err = rd.ReadString('\n')
  124. if err != nil {
  125. return sha, typ, size, err
  126. }
  127. if len(typ) == 1 {
  128. typ, err = rd.ReadString('\n')
  129. if err != nil {
  130. return sha, typ, size, err
  131. }
  132. }
  133. idx := strings.IndexByte(typ, ' ')
  134. if idx < 0 {
  135. log.Debug("missing space typ: %s", typ)
  136. return sha, typ, size, ErrNotExist{ID: string(sha)}
  137. }
  138. sha = []byte(typ[:idx])
  139. typ = typ[idx+1:]
  140. idx = strings.IndexByte(typ, ' ')
  141. if idx < 0 {
  142. return sha, typ, size, ErrNotExist{ID: string(sha)}
  143. }
  144. sizeStr := typ[idx+1 : len(typ)-1]
  145. typ = typ[:idx]
  146. size, err = strconv.ParseInt(sizeStr, 10, 64)
  147. return sha, typ, size, err
  148. }
  149. // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream.
  150. func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) {
  151. var id string
  152. var n int64
  153. headerLoop:
  154. for {
  155. line, err := rd.ReadBytes('\n')
  156. if err != nil {
  157. return "", err
  158. }
  159. n += int64(len(line))
  160. idx := bytes.Index(line, []byte{' '})
  161. if idx < 0 {
  162. continue
  163. }
  164. if string(line[:idx]) == "object" {
  165. id = string(line[idx+1 : len(line)-1])
  166. break headerLoop
  167. }
  168. }
  169. // Discard the rest of the tag
  170. return id, DiscardFull(rd, size-n+1)
  171. }
  172. // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream.
  173. func ReadTreeID(rd *bufio.Reader, size int64) (string, error) {
  174. var id string
  175. var n int64
  176. headerLoop:
  177. for {
  178. line, err := rd.ReadBytes('\n')
  179. if err != nil {
  180. return "", err
  181. }
  182. n += int64(len(line))
  183. idx := bytes.Index(line, []byte{' '})
  184. if idx < 0 {
  185. continue
  186. }
  187. if string(line[:idx]) == "tree" {
  188. id = string(line[idx+1 : len(line)-1])
  189. break headerLoop
  190. }
  191. }
  192. // Discard the rest of the commit
  193. return id, DiscardFull(rd, size-n+1)
  194. }
  195. // git tree files are a list:
  196. // <mode-in-ascii> SP <fname> NUL <binary Hash>
  197. //
  198. // Unfortunately this 20-byte notation is somewhat in conflict to all other git tools
  199. // Therefore we need some method to convert these binary hashes to hex hashes
  200. // constant hextable to help quickly convert between binary and hex representation
  201. const hextable = "0123456789abcdef"
  202. // BinToHexHeash converts a binary Hash into a hex encoded one. Input and output can be the
  203. // same byte slice to support in place conversion without allocations.
  204. // This is at least 100x quicker that hex.EncodeToString
  205. func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte {
  206. for i := objectFormat.FullLength()/2 - 1; i >= 0; i-- {
  207. v := sha[i]
  208. vhi, vlo := v>>4, v&0x0f
  209. shi, slo := hextable[vhi], hextable[vlo]
  210. out[i*2], out[i*2+1] = shi, slo
  211. }
  212. return out
  213. }
  214. // ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream
  215. // This carefully avoids allocations - except where fnameBuf is too small.
  216. // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
  217. //
  218. // Each line is composed of:
  219. // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
  220. //
  221. // We don't attempt to convert the raw HASH to save a lot of time
  222. func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
  223. var readBytes []byte
  224. // Read the Mode & fname
  225. readBytes, err = rd.ReadSlice('\x00')
  226. if err != nil {
  227. return mode, fname, sha, n, err
  228. }
  229. idx := bytes.IndexByte(readBytes, ' ')
  230. if idx < 0 {
  231. log.Debug("missing space in readBytes ParseCatFileTreeLine: %s", readBytes)
  232. return mode, fname, sha, n, &ErrNotExist{}
  233. }
  234. n += idx + 1
  235. copy(modeBuf, readBytes[:idx])
  236. if len(modeBuf) >= idx {
  237. modeBuf = modeBuf[:idx]
  238. } else {
  239. modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
  240. }
  241. mode = modeBuf
  242. readBytes = readBytes[idx+1:]
  243. // Deal with the fname
  244. copy(fnameBuf, readBytes)
  245. if len(fnameBuf) > len(readBytes) {
  246. fnameBuf = fnameBuf[:len(readBytes)]
  247. } else {
  248. fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
  249. }
  250. for err == bufio.ErrBufferFull {
  251. readBytes, err = rd.ReadSlice('\x00')
  252. fnameBuf = append(fnameBuf, readBytes...)
  253. }
  254. n += len(fnameBuf)
  255. if err != nil {
  256. return mode, fname, sha, n, err
  257. }
  258. fnameBuf = fnameBuf[:len(fnameBuf)-1]
  259. fname = fnameBuf
  260. // Deal with the binary hash
  261. idx = 0
  262. length := objectFormat.FullLength() / 2
  263. for idx < length {
  264. var read int
  265. read, err = rd.Read(shaBuf[idx:length])
  266. n += read
  267. if err != nil {
  268. return mode, fname, sha, n, err
  269. }
  270. idx += read
  271. }
  272. sha = shaBuf
  273. return mode, fname, sha, n, err
  274. }
  275. func DiscardFull(rd *bufio.Reader, discard int64) error {
  276. if discard > math.MaxInt32 {
  277. n, err := rd.Discard(math.MaxInt32)
  278. discard -= int64(n)
  279. if err != nil {
  280. return err
  281. }
  282. }
  283. for discard > 0 {
  284. n, err := rd.Discard(int(discard))
  285. discard -= int64(n)
  286. if err != nil {
  287. return err
  288. }
  289. }
  290. return nil
  291. }