gitea源码

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411
  1. // Copyright 2014 The Gogs Authors. All rights reserved.
  2. // Copyright 2019 The Gitea Authors. All rights reserved.
  3. // SPDX-License-Identifier: MIT
  4. package gitdiff
  5. import (
  6. "bufio"
  7. "bytes"
  8. "context"
  9. "fmt"
  10. "html"
  11. "html/template"
  12. "io"
  13. "net/url"
  14. "sort"
  15. "strings"
  16. "time"
  17. "code.gitea.io/gitea/models/db"
  18. git_model "code.gitea.io/gitea/models/git"
  19. issues_model "code.gitea.io/gitea/models/issues"
  20. pull_model "code.gitea.io/gitea/models/pull"
  21. user_model "code.gitea.io/gitea/models/user"
  22. "code.gitea.io/gitea/modules/analyze"
  23. "code.gitea.io/gitea/modules/charset"
  24. "code.gitea.io/gitea/modules/git"
  25. "code.gitea.io/gitea/modules/git/attribute"
  26. "code.gitea.io/gitea/modules/git/gitcmd"
  27. "code.gitea.io/gitea/modules/highlight"
  28. "code.gitea.io/gitea/modules/lfs"
  29. "code.gitea.io/gitea/modules/log"
  30. "code.gitea.io/gitea/modules/optional"
  31. "code.gitea.io/gitea/modules/setting"
  32. "code.gitea.io/gitea/modules/translation"
  33. "code.gitea.io/gitea/modules/util"
  34. "github.com/sergi/go-diff/diffmatchpatch"
  35. stdcharset "golang.org/x/net/html/charset"
  36. "golang.org/x/text/encoding"
  37. "golang.org/x/text/transform"
  38. )
  39. // DiffLineType represents the type of DiffLine.
  40. type DiffLineType uint8
  41. // DiffLineType possible values.
  42. const (
  43. DiffLinePlain DiffLineType = iota + 1
  44. DiffLineAdd
  45. DiffLineDel
  46. DiffLineSection
  47. )
  48. // DiffFileType represents the type of DiffFile.
  49. type DiffFileType uint8
  50. // DiffFileType possible values.
  51. const (
  52. DiffFileAdd DiffFileType = iota + 1
  53. DiffFileChange
  54. DiffFileDel
  55. DiffFileRename
  56. DiffFileCopy
  57. )
  58. // DiffLineExpandDirection represents the DiffLineSection expand direction
  59. type DiffLineExpandDirection uint8
  60. // DiffLineExpandDirection possible values.
  61. const (
  62. DiffLineExpandNone DiffLineExpandDirection = iota + 1
  63. DiffLineExpandSingle
  64. DiffLineExpandUpDown
  65. DiffLineExpandUp
  66. DiffLineExpandDown
  67. )
  68. // DiffLine represents a line difference in a DiffSection.
  69. type DiffLine struct {
  70. LeftIdx int // line number, 1-based
  71. RightIdx int // line number, 1-based
  72. Match int // the diff matched index. -1: no match. 0: plain and no need to match. >0: for add/del, "Lines" slice index of the other side
  73. Type DiffLineType
  74. Content string
  75. Comments issues_model.CommentList // related PR code comments
  76. SectionInfo *DiffLineSectionInfo
  77. }
  78. // DiffLineSectionInfo represents diff line section meta data
  79. type DiffLineSectionInfo struct {
  80. Path string
  81. LastLeftIdx int
  82. LastRightIdx int
  83. LeftIdx int
  84. RightIdx int
  85. LeftHunkSize int
  86. RightHunkSize int
  87. }
  88. // DiffHTMLOperation is the HTML version of diffmatchpatch.Diff
  89. type DiffHTMLOperation struct {
  90. Type diffmatchpatch.Operation
  91. HTML template.HTML
  92. }
  93. // BlobExcerptChunkSize represent max lines of excerpt
  94. const BlobExcerptChunkSize = 20
  95. // MaxDiffHighlightEntireFileSize is the maximum file size that will be highlighted with "entire file diff"
  96. const MaxDiffHighlightEntireFileSize = 1 * 1024 * 1024
  97. // GetType returns the type of DiffLine.
  98. func (d *DiffLine) GetType() int {
  99. return int(d.Type)
  100. }
  101. // GetHTMLDiffLineType returns the diff line type name for HTML
  102. func (d *DiffLine) GetHTMLDiffLineType() string {
  103. switch d.Type {
  104. case DiffLineAdd:
  105. return "add"
  106. case DiffLineDel:
  107. return "del"
  108. case DiffLineSection:
  109. return "tag"
  110. default:
  111. return "same"
  112. }
  113. }
  114. // CanComment returns whether a line can get commented
  115. func (d *DiffLine) CanComment() bool {
  116. return len(d.Comments) == 0 && d.Type != DiffLineSection
  117. }
  118. // GetCommentSide returns the comment side of the first comment, if not set returns empty string
  119. func (d *DiffLine) GetCommentSide() string {
  120. if len(d.Comments) == 0 {
  121. return ""
  122. }
  123. return d.Comments[0].DiffSide()
  124. }
  125. // GetLineTypeMarker returns the line type marker
  126. func (d *DiffLine) GetLineTypeMarker() string {
  127. if strings.IndexByte(" +-", d.Content[0]) > -1 {
  128. return d.Content[0:1]
  129. }
  130. return ""
  131. }
  132. // GetBlobExcerptQuery builds query string to get blob excerpt
  133. func (d *DiffLine) GetBlobExcerptQuery() string {
  134. query := fmt.Sprintf(
  135. "last_left=%d&last_right=%d&"+
  136. "left=%d&right=%d&"+
  137. "left_hunk_size=%d&right_hunk_size=%d&"+
  138. "path=%s",
  139. d.SectionInfo.LastLeftIdx, d.SectionInfo.LastRightIdx,
  140. d.SectionInfo.LeftIdx, d.SectionInfo.RightIdx,
  141. d.SectionInfo.LeftHunkSize, d.SectionInfo.RightHunkSize,
  142. url.QueryEscape(d.SectionInfo.Path))
  143. return query
  144. }
  145. // GetExpandDirection gets DiffLineExpandDirection
  146. func (d *DiffLine) GetExpandDirection() DiffLineExpandDirection {
  147. if d.Type != DiffLineSection || d.SectionInfo == nil || d.SectionInfo.LeftIdx-d.SectionInfo.LastLeftIdx <= 1 || d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx <= 1 {
  148. return DiffLineExpandNone
  149. }
  150. if d.SectionInfo.LastLeftIdx <= 0 && d.SectionInfo.LastRightIdx <= 0 {
  151. return DiffLineExpandUp
  152. } else if d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx > BlobExcerptChunkSize && d.SectionInfo.RightHunkSize > 0 {
  153. return DiffLineExpandUpDown
  154. } else if d.SectionInfo.LeftHunkSize <= 0 && d.SectionInfo.RightHunkSize <= 0 {
  155. return DiffLineExpandDown
  156. }
  157. return DiffLineExpandSingle
  158. }
  159. func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int) *DiffLineSectionInfo {
  160. leftLine, leftHunk, rightLine, rightHunk := git.ParseDiffHunkString(line)
  161. return &DiffLineSectionInfo{
  162. Path: treePath,
  163. LastLeftIdx: lastLeftIdx,
  164. LastRightIdx: lastRightIdx,
  165. LeftIdx: leftLine,
  166. RightIdx: rightLine,
  167. LeftHunkSize: leftHunk,
  168. RightHunkSize: rightHunk,
  169. }
  170. }
  171. // escape a line's content or return <br> needed for copy/paste purposes
  172. func getLineContent(content string, locale translation.Locale) DiffInline {
  173. if len(content) > 0 {
  174. return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale)
  175. }
  176. return DiffInline{EscapeStatus: &charset.EscapeStatus{}, Content: "<br>"}
  177. }
  178. // DiffSection represents a section of a DiffFile.
  179. type DiffSection struct {
  180. file *DiffFile
  181. FileName string
  182. Lines []*DiffLine
  183. }
  184. func (diffSection *DiffSection) GetLine(idx int) *DiffLine {
  185. if idx <= 0 {
  186. return nil
  187. }
  188. return diffSection.Lines[idx]
  189. }
  190. func defaultDiffMatchPatch() *diffmatchpatch.DiffMatchPatch {
  191. dmp := diffmatchpatch.New()
  192. dmp.DiffEditCost = 100
  193. return dmp
  194. }
  195. // DiffInline is a struct that has a content and escape status
  196. type DiffInline struct {
  197. EscapeStatus *charset.EscapeStatus
  198. Content template.HTML
  199. }
  200. // DiffInlineWithUnicodeEscape makes a DiffInline with hidden Unicode characters escaped
  201. func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
  202. status, content := charset.EscapeControlHTML(s, locale)
  203. return DiffInline{EscapeStatus: status, Content: content}
  204. }
  205. func (diffSection *DiffSection) getLineContentForRender(lineIdx int, diffLine *DiffLine, fileLanguage string, highlightLines map[int]template.HTML) template.HTML {
  206. h, ok := highlightLines[lineIdx-1]
  207. if ok {
  208. return h
  209. }
  210. if diffLine.Content == "" {
  211. return ""
  212. }
  213. if setting.Git.DisableDiffHighlight {
  214. return template.HTML(html.EscapeString(diffLine.Content[1:]))
  215. }
  216. h, _ = highlight.Code(diffSection.FileName, fileLanguage, diffLine.Content[1:])
  217. return h
  218. }
  219. func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, leftLine, rightLine *DiffLine, locale translation.Locale) DiffInline {
  220. var fileLanguage string
  221. var highlightedLeftLines, highlightedRightLines map[int]template.HTML
  222. // when a "diff section" is manually prepared by ExcerptBlob, it doesn't have "file" information
  223. if diffSection.file != nil {
  224. fileLanguage = diffSection.file.Language
  225. highlightedLeftLines, highlightedRightLines = diffSection.file.highlightedLeftLines, diffSection.file.highlightedRightLines
  226. }
  227. var lineHTML template.HTML
  228. hcd := newHighlightCodeDiff()
  229. if diffLineType == DiffLinePlain {
  230. // left and right are the same, no need to do line-level diff
  231. if leftLine != nil {
  232. lineHTML = diffSection.getLineContentForRender(leftLine.LeftIdx, leftLine, fileLanguage, highlightedLeftLines)
  233. } else if rightLine != nil {
  234. lineHTML = diffSection.getLineContentForRender(rightLine.RightIdx, rightLine, fileLanguage, highlightedRightLines)
  235. }
  236. } else {
  237. var diff1, diff2 template.HTML
  238. if leftLine != nil {
  239. diff1 = diffSection.getLineContentForRender(leftLine.LeftIdx, leftLine, fileLanguage, highlightedLeftLines)
  240. }
  241. if rightLine != nil {
  242. diff2 = diffSection.getLineContentForRender(rightLine.RightIdx, rightLine, fileLanguage, highlightedRightLines)
  243. }
  244. if diff1 != "" && diff2 != "" {
  245. // if only some parts of a line are changed, highlight these changed parts as "deleted/added".
  246. lineHTML = hcd.diffLineWithHighlight(diffLineType, diff1, diff2)
  247. } else {
  248. // if left is empty or right is empty (a line is fully deleted or added), then we do not need to diff anymore.
  249. // the tmpl code already adds background colors for these cases.
  250. lineHTML = util.Iif(diffLineType == DiffLineDel, diff1, diff2)
  251. }
  252. }
  253. return DiffInlineWithUnicodeEscape(lineHTML, locale)
  254. }
  255. // GetComputedInlineDiffFor computes inline diff for the given line.
  256. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
  257. // try to find equivalent diff line. ignore, otherwise
  258. switch diffLine.Type {
  259. case DiffLineSection:
  260. return getLineContent(diffLine.Content, locale)
  261. case DiffLineAdd:
  262. compareDiffLine := diffSection.GetLine(diffLine.Match)
  263. return diffSection.getDiffLineForRender(DiffLineAdd, compareDiffLine, diffLine, locale)
  264. case DiffLineDel:
  265. compareDiffLine := diffSection.GetLine(diffLine.Match)
  266. return diffSection.getDiffLineForRender(DiffLineDel, diffLine, compareDiffLine, locale)
  267. default: // Plain
  268. // TODO: there was an "if" check: `if diffLine.Content >strings.IndexByte(" +-", diffLine.Content[0]) > -1 { ... } else { ... }`
  269. // no idea why it needs that check, it seems that the "if" should be always true, so try to simplify the code
  270. return diffSection.getDiffLineForRender(DiffLinePlain, nil, diffLine, locale)
  271. }
  272. }
  273. // DiffFile represents a file diff.
  274. type DiffFile struct {
  275. // only used internally to parse Ambiguous filenames
  276. isAmbiguous bool
  277. // basic fields (parsed from diff result)
  278. Name string
  279. NameHash string
  280. OldName string
  281. Addition int
  282. Deletion int
  283. Type DiffFileType
  284. Mode string
  285. OldMode string
  286. IsCreated bool
  287. IsDeleted bool
  288. IsBin bool
  289. IsLFSFile bool
  290. IsRenamed bool
  291. IsSubmodule bool
  292. // basic fields but for render purpose only
  293. Sections []*DiffSection
  294. IsIncomplete bool
  295. IsIncompleteLineTooLong bool
  296. // will be filled by the extra loop in GitDiffForRender
  297. Language string
  298. IsGenerated bool
  299. IsVendored bool
  300. SubmoduleDiffInfo *SubmoduleDiffInfo // IsSubmodule==true, then there must be a SubmoduleDiffInfo
  301. // will be filled by route handler
  302. IsProtected bool
  303. // will be filled by SyncUserSpecificDiff
  304. IsViewed bool // User specific
  305. HasChangedSinceLastReview bool // User specific
  306. // for render purpose only, will be filled by the extra loop in GitDiffForRender
  307. highlightedLeftLines map[int]template.HTML
  308. highlightedRightLines map[int]template.HTML
  309. }
  310. // GetType returns type of diff file.
  311. func (diffFile *DiffFile) GetType() int {
  312. return int(diffFile.Type)
  313. }
  314. type DiffLimitedContent struct {
  315. LeftContent, RightContent *limitByteWriter
  316. }
  317. // GetTailSectionAndLimitedContent creates a fake DiffLineSection if the last section is not the end of the file
  318. func (diffFile *DiffFile) GetTailSectionAndLimitedContent(leftCommit, rightCommit *git.Commit) (_ *DiffSection, diffLimitedContent DiffLimitedContent) {
  319. var leftLineCount, rightLineCount int
  320. diffLimitedContent = DiffLimitedContent{}
  321. if diffFile.IsBin || diffFile.IsLFSFile {
  322. return nil, diffLimitedContent
  323. }
  324. if (diffFile.Type == DiffFileDel || diffFile.Type == DiffFileChange) && leftCommit != nil {
  325. leftLineCount, diffLimitedContent.LeftContent = getCommitFileLineCountAndLimitedContent(leftCommit, diffFile.OldName)
  326. }
  327. if (diffFile.Type == DiffFileAdd || diffFile.Type == DiffFileChange) && rightCommit != nil {
  328. rightLineCount, diffLimitedContent.RightContent = getCommitFileLineCountAndLimitedContent(rightCommit, diffFile.OldName)
  329. }
  330. if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange {
  331. return nil, diffLimitedContent
  332. }
  333. lastSection := diffFile.Sections[len(diffFile.Sections)-1]
  334. lastLine := lastSection.Lines[len(lastSection.Lines)-1]
  335. if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx {
  336. return nil, diffLimitedContent
  337. }
  338. tailDiffLine := &DiffLine{
  339. Type: DiffLineSection,
  340. Content: " ",
  341. SectionInfo: &DiffLineSectionInfo{
  342. Path: diffFile.Name,
  343. LastLeftIdx: lastLine.LeftIdx,
  344. LastRightIdx: lastLine.RightIdx,
  345. LeftIdx: leftLineCount,
  346. RightIdx: rightLineCount,
  347. },
  348. }
  349. tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}}
  350. return tailSection, diffLimitedContent
  351. }
  352. // GetDiffFileName returns the name of the diff file, or its old name in case it was deleted
  353. func (diffFile *DiffFile) GetDiffFileName() string {
  354. if diffFile.Name == "" {
  355. return diffFile.OldName
  356. }
  357. return diffFile.Name
  358. }
  359. func (diffFile *DiffFile) ShouldBeHidden() bool {
  360. return diffFile.IsGenerated || diffFile.IsViewed
  361. }
  362. func (diffFile *DiffFile) ModeTranslationKey(mode string) string {
  363. switch mode {
  364. case "040000":
  365. return "git.filemode.directory"
  366. case "100644":
  367. return "git.filemode.normal_file"
  368. case "100755":
  369. return "git.filemode.executable_file"
  370. case "120000":
  371. return "git.filemode.symbolic_link"
  372. case "160000":
  373. return "git.filemode.submodule"
  374. default:
  375. return mode
  376. }
  377. }
  378. type limitByteWriter struct {
  379. buf bytes.Buffer
  380. limit int
  381. }
  382. func (l *limitByteWriter) Write(p []byte) (n int, err error) {
  383. if l.buf.Len()+len(p) > l.limit {
  384. p = p[:l.limit-l.buf.Len()]
  385. }
  386. return l.buf.Write(p)
  387. }
  388. func getCommitFileLineCountAndLimitedContent(commit *git.Commit, filePath string) (lineCount int, limitWriter *limitByteWriter) {
  389. blob, err := commit.GetBlobByPath(filePath)
  390. if err != nil {
  391. return 0, nil
  392. }
  393. w := &limitByteWriter{limit: MaxDiffHighlightEntireFileSize + 1}
  394. lineCount, err = blob.GetBlobLineCount(w)
  395. if err != nil {
  396. return 0, nil
  397. }
  398. return lineCount, w
  399. }
  400. // Diff represents a difference between two git trees.
  401. type Diff struct {
  402. Start, End string
  403. Files []*DiffFile
  404. IsIncomplete bool
  405. NumViewedFiles int // user-specific
  406. }
  407. // LoadComments loads comments into each line
  408. func (diff *Diff) LoadComments(ctx context.Context, issue *issues_model.Issue, currentUser *user_model.User, showOutdatedComments bool) error {
  409. allComments, err := issues_model.FetchCodeComments(ctx, issue, currentUser, showOutdatedComments)
  410. if err != nil {
  411. return err
  412. }
  413. for _, file := range diff.Files {
  414. if lineCommits, ok := allComments[file.Name]; ok {
  415. for _, section := range file.Sections {
  416. for _, line := range section.Lines {
  417. if comments, ok := lineCommits[int64(line.LeftIdx*-1)]; ok {
  418. line.Comments = append(line.Comments, comments...)
  419. }
  420. if comments, ok := lineCommits[int64(line.RightIdx)]; ok {
  421. line.Comments = append(line.Comments, comments...)
  422. }
  423. sort.SliceStable(line.Comments, func(i, j int) bool {
  424. return line.Comments[i].CreatedUnix < line.Comments[j].CreatedUnix
  425. })
  426. }
  427. }
  428. }
  429. }
  430. return nil
  431. }
  432. const cmdDiffHead = "diff --git "
  433. // ParsePatch builds a Diff object from a io.Reader and some parameters.
  434. func ParsePatch(ctx context.Context, maxLines, maxLineCharacters, maxFiles int, reader io.Reader, skipToFile string) (*Diff, error) {
  435. log.Debug("ParsePatch(%d, %d, %d, ..., %s)", maxLines, maxLineCharacters, maxFiles, skipToFile)
  436. var curFile *DiffFile
  437. skipping := skipToFile != ""
  438. diff := &Diff{Files: make([]*DiffFile, 0)}
  439. sb := strings.Builder{}
  440. // OK let's set a reasonable buffer size.
  441. // This should be at least the size of maxLineCharacters or 4096 whichever is larger.
  442. readerSize := max(maxLineCharacters, 4096)
  443. input := bufio.NewReaderSize(reader, readerSize)
  444. line, err := input.ReadString('\n')
  445. if err != nil {
  446. if err == io.EOF {
  447. return diff, nil
  448. }
  449. return diff, err
  450. }
  451. prepareValue := func(s, p string) string {
  452. return strings.TrimSpace(strings.TrimPrefix(s, p))
  453. }
  454. parsingLoop:
  455. for {
  456. // 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted)
  457. // if it does not we have bad input!
  458. if !strings.HasPrefix(line, cmdDiffHead) {
  459. return diff, fmt.Errorf("invalid first file line: %s", line)
  460. }
  461. if maxFiles > -1 && len(diff.Files) >= maxFiles {
  462. lastFile := createDiffFile(line)
  463. diff.End = lastFile.Name
  464. diff.IsIncomplete = true
  465. break parsingLoop
  466. }
  467. curFile = createDiffFile(line)
  468. if skipping {
  469. if curFile.Name != skipToFile {
  470. line, err = skipToNextDiffHead(input)
  471. if err != nil {
  472. if err == io.EOF {
  473. return diff, nil
  474. }
  475. return diff, err
  476. }
  477. continue
  478. }
  479. skipping = false
  480. }
  481. diff.Files = append(diff.Files, curFile)
  482. // 2. It is followed by one or more extended header lines:
  483. //
  484. // old mode <mode>
  485. // new mode <mode>
  486. // deleted file mode <mode>
  487. // new file mode <mode>
  488. // copy from <path>
  489. // copy to <path>
  490. // rename from <path>
  491. // rename to <path>
  492. // similarity index <number>
  493. // dissimilarity index <number>
  494. // index <hash>..<hash> <mode>
  495. //
  496. // * <mode> 6-digit octal numbers including the file type and file permission bits.
  497. // * <path> does not include the a/ and b/ prefixes
  498. // * <number> percentage of unchanged lines for similarity, percentage of changed
  499. // lines dissimilarity as integer rounded down with terminal %. 100% => equal files.
  500. // * The index line includes the blob object names before and after the change.
  501. // The <mode> is included if the file mode does not change; otherwise, separate
  502. // lines indicate the old and the new mode.
  503. // 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...)
  504. //
  505. // --- a/<path>
  506. // +++ b/<path>
  507. //
  508. // With multiple hunks
  509. //
  510. // @@ <hunk descriptor> @@
  511. // +added line
  512. // -removed line
  513. // unchanged line
  514. //
  515. // 4. Binary files get:
  516. //
  517. // Binary files a/<path> and b/<path> differ
  518. //
  519. // but one of a/<path> and b/<path> could be /dev/null.
  520. curFileLoop:
  521. for {
  522. line, err = input.ReadString('\n')
  523. if err != nil {
  524. if err != io.EOF {
  525. return diff, err
  526. }
  527. break parsingLoop
  528. }
  529. switch {
  530. case strings.HasPrefix(line, cmdDiffHead):
  531. break curFileLoop
  532. case strings.HasPrefix(line, "old mode ") ||
  533. strings.HasPrefix(line, "new mode "):
  534. if strings.HasPrefix(line, "old mode ") {
  535. curFile.OldMode = prepareValue(line, "old mode ")
  536. }
  537. if strings.HasPrefix(line, "new mode ") {
  538. curFile.Mode = prepareValue(line, "new mode ")
  539. }
  540. if strings.HasSuffix(line, " 160000\n") {
  541. curFile.IsSubmodule, curFile.SubmoduleDiffInfo = true, &SubmoduleDiffInfo{}
  542. }
  543. case strings.HasPrefix(line, "rename from "):
  544. curFile.IsRenamed = true
  545. curFile.Type = DiffFileRename
  546. if curFile.isAmbiguous {
  547. curFile.OldName = prepareValue(line, "rename from ")
  548. }
  549. case strings.HasPrefix(line, "rename to "):
  550. curFile.IsRenamed = true
  551. curFile.Type = DiffFileRename
  552. if curFile.isAmbiguous {
  553. curFile.Name = prepareValue(line, "rename to ")
  554. curFile.isAmbiguous = false
  555. }
  556. case strings.HasPrefix(line, "copy from "):
  557. curFile.IsRenamed = true
  558. curFile.Type = DiffFileCopy
  559. if curFile.isAmbiguous {
  560. curFile.OldName = prepareValue(line, "copy from ")
  561. }
  562. case strings.HasPrefix(line, "copy to "):
  563. curFile.IsRenamed = true
  564. curFile.Type = DiffFileCopy
  565. if curFile.isAmbiguous {
  566. curFile.Name = prepareValue(line, "copy to ")
  567. curFile.isAmbiguous = false
  568. }
  569. case strings.HasPrefix(line, "new file"):
  570. curFile.Type = DiffFileAdd
  571. curFile.IsCreated = true
  572. if strings.HasPrefix(line, "new file mode ") {
  573. curFile.Mode = prepareValue(line, "new file mode ")
  574. }
  575. if strings.HasSuffix(line, " 160000\n") {
  576. curFile.IsSubmodule, curFile.SubmoduleDiffInfo = true, &SubmoduleDiffInfo{}
  577. }
  578. case strings.HasPrefix(line, "deleted"):
  579. curFile.Type = DiffFileDel
  580. curFile.IsDeleted = true
  581. if strings.HasSuffix(line, " 160000\n") {
  582. curFile.IsSubmodule, curFile.SubmoduleDiffInfo = true, &SubmoduleDiffInfo{}
  583. }
  584. case strings.HasPrefix(line, "index"):
  585. if strings.HasSuffix(line, " 160000\n") {
  586. curFile.IsSubmodule, curFile.SubmoduleDiffInfo = true, &SubmoduleDiffInfo{}
  587. }
  588. case strings.HasPrefix(line, "similarity index 100%"):
  589. curFile.Type = DiffFileRename
  590. case strings.HasPrefix(line, "Binary"):
  591. curFile.IsBin = true
  592. case strings.HasPrefix(line, "--- "):
  593. // Handle ambiguous filenames
  594. if curFile.isAmbiguous {
  595. // The shortest string that can end up here is:
  596. // "--- a\t\n" without the quotes.
  597. // This line has a len() of 7 but doesn't contain a oldName.
  598. // So the amount that the line need is at least 8 or more.
  599. // The code will otherwise panic for a out-of-bounds.
  600. if len(line) > 7 && line[4] == 'a' {
  601. curFile.OldName = line[6 : len(line)-1]
  602. if line[len(line)-2] == '\t' {
  603. curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
  604. }
  605. } else {
  606. curFile.OldName = ""
  607. }
  608. }
  609. // Otherwise do nothing with this line
  610. case strings.HasPrefix(line, "+++ "):
  611. // Handle ambiguous filenames
  612. if curFile.isAmbiguous {
  613. if len(line) > 6 && line[4] == 'b' {
  614. curFile.Name = line[6 : len(line)-1]
  615. if line[len(line)-2] == '\t' {
  616. curFile.Name = curFile.Name[:len(curFile.Name)-1]
  617. }
  618. if curFile.OldName == "" {
  619. curFile.OldName = curFile.Name
  620. }
  621. } else {
  622. curFile.Name = curFile.OldName
  623. }
  624. curFile.isAmbiguous = false
  625. }
  626. // Otherwise do nothing with this line, but now switch to parsing hunks
  627. lineBytes, isFragment, err := parseHunks(ctx, curFile, maxLines, maxLineCharacters, input)
  628. if err != nil {
  629. if err != io.EOF {
  630. return diff, err
  631. }
  632. break parsingLoop
  633. }
  634. sb.Reset()
  635. _, _ = sb.Write(lineBytes)
  636. for isFragment {
  637. lineBytes, isFragment, err = input.ReadLine()
  638. if err != nil {
  639. // Now by the definition of ReadLine this cannot be io.EOF
  640. return diff, fmt.Errorf("unable to ReadLine: %w", err)
  641. }
  642. _, _ = sb.Write(lineBytes)
  643. }
  644. line = sb.String()
  645. sb.Reset()
  646. break curFileLoop
  647. }
  648. }
  649. }
  650. // TODO: There are numerous issues with this:
  651. // - we might want to consider detecting encoding while parsing but...
  652. // - we're likely to fail to get the correct encoding here anyway as we won't have enough information
  653. diffLineTypeBuffers := make(map[DiffLineType]*bytes.Buffer, 3)
  654. diffLineTypeDecoders := make(map[DiffLineType]*encoding.Decoder, 3)
  655. diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer)
  656. diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer)
  657. diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer)
  658. for _, f := range diff.Files {
  659. f.NameHash = git.HashFilePathForWebUI(f.Name)
  660. for _, buffer := range diffLineTypeBuffers {
  661. buffer.Reset()
  662. }
  663. for _, sec := range f.Sections {
  664. for _, l := range sec.Lines {
  665. if l.Type == DiffLineSection {
  666. continue
  667. }
  668. diffLineTypeBuffers[l.Type].WriteString(l.Content[1:])
  669. diffLineTypeBuffers[l.Type].WriteString("\n")
  670. }
  671. }
  672. for lineType, buffer := range diffLineTypeBuffers {
  673. diffLineTypeDecoders[lineType] = nil
  674. if buffer.Len() == 0 {
  675. continue
  676. }
  677. charsetLabel, err := charset.DetectEncoding(buffer.Bytes())
  678. if charsetLabel != "UTF-8" && err == nil {
  679. encoding, _ := stdcharset.Lookup(charsetLabel)
  680. if encoding != nil {
  681. diffLineTypeDecoders[lineType] = encoding.NewDecoder()
  682. }
  683. }
  684. }
  685. for _, sec := range f.Sections {
  686. for _, l := range sec.Lines {
  687. decoder := diffLineTypeDecoders[l.Type]
  688. if decoder != nil {
  689. if c, _, err := transform.String(decoder, l.Content[1:]); err == nil {
  690. l.Content = l.Content[0:1] + c
  691. }
  692. }
  693. }
  694. }
  695. }
  696. return diff, nil
  697. }
  698. func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
  699. // need to skip until the next cmdDiffHead
  700. var isFragment, wasFragment bool
  701. var lineBytes []byte
  702. for {
  703. lineBytes, isFragment, err = input.ReadLine()
  704. if err != nil {
  705. return "", err
  706. }
  707. if wasFragment {
  708. wasFragment = isFragment
  709. continue
  710. }
  711. if bytes.HasPrefix(lineBytes, []byte(cmdDiffHead)) {
  712. break
  713. }
  714. wasFragment = isFragment
  715. }
  716. line = string(lineBytes)
  717. if isFragment {
  718. var tail string
  719. tail, err = input.ReadString('\n')
  720. if err != nil {
  721. return "", err
  722. }
  723. line += tail
  724. }
  725. return line, err
  726. }
  727. func parseHunks(ctx context.Context, curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) {
  728. sb := strings.Builder{}
  729. var (
  730. curSection *DiffSection
  731. curFileLinesCount int
  732. curFileLFSPrefix bool
  733. )
  734. lastLeftIdx := -1
  735. leftLine, rightLine := 1, 1
  736. for {
  737. for isFragment {
  738. curFile.IsIncomplete = true
  739. curFile.IsIncompleteLineTooLong = true
  740. _, isFragment, err = input.ReadLine()
  741. if err != nil {
  742. // Now by the definition of ReadLine this cannot be io.EOF
  743. return nil, false, fmt.Errorf("unable to ReadLine: %w", err)
  744. }
  745. }
  746. sb.Reset()
  747. lineBytes, isFragment, err = input.ReadLine()
  748. if err != nil {
  749. if err == io.EOF {
  750. return lineBytes, isFragment, err
  751. }
  752. err = fmt.Errorf("unable to ReadLine: %w", err)
  753. return nil, false, err
  754. }
  755. if lineBytes[0] == 'd' {
  756. // End of hunks
  757. return lineBytes, isFragment, err
  758. }
  759. switch lineBytes[0] {
  760. case '@':
  761. if maxLines > -1 && curFileLinesCount >= maxLines {
  762. curFile.IsIncomplete = true
  763. continue
  764. }
  765. _, _ = sb.Write(lineBytes)
  766. for isFragment {
  767. // This is very odd indeed - we're in a section header and the line is too long
  768. // This really shouldn't happen...
  769. lineBytes, isFragment, err = input.ReadLine()
  770. if err != nil {
  771. // Now by the definition of ReadLine this cannot be io.EOF
  772. return nil, false, fmt.Errorf("unable to ReadLine: %w", err)
  773. }
  774. _, _ = sb.Write(lineBytes)
  775. }
  776. line := sb.String()
  777. // Create a new section to represent this hunk
  778. curSection = &DiffSection{file: curFile}
  779. lastLeftIdx = -1
  780. curFile.Sections = append(curFile.Sections, curSection)
  781. // FIXME: the "-1" can't be right, these "line idx" are all 1-based, maybe there are other bugs that covers this bug.
  782. lineSectionInfo := getDiffLineSectionInfo(curFile.Name, line, leftLine-1, rightLine-1)
  783. diffLine := &DiffLine{
  784. Type: DiffLineSection,
  785. Content: line,
  786. SectionInfo: lineSectionInfo,
  787. }
  788. curSection.Lines = append(curSection.Lines, diffLine)
  789. curSection.FileName = curFile.Name
  790. // update line number.
  791. leftLine = lineSectionInfo.LeftIdx
  792. rightLine = lineSectionInfo.RightIdx
  793. continue
  794. case '\\':
  795. if maxLines > -1 && curFileLinesCount >= maxLines {
  796. curFile.IsIncomplete = true
  797. continue
  798. }
  799. // This is used only to indicate that the current file does not have a terminal newline
  800. if !bytes.Equal(lineBytes, []byte("\\ No newline at end of file")) {
  801. return nil, false, fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
  802. }
  803. // Technically this should be the end the file!
  804. // FIXME: we should be putting a marker at the end of the file if there is no terminal new line
  805. continue
  806. case '+':
  807. curFileLinesCount++
  808. curFile.Addition++
  809. if maxLines > -1 && curFileLinesCount >= maxLines {
  810. curFile.IsIncomplete = true
  811. continue
  812. }
  813. diffLine := &DiffLine{Type: DiffLineAdd, RightIdx: rightLine, Match: -1}
  814. rightLine++
  815. if curSection == nil {
  816. // Create a new section to represent this hunk
  817. curSection = &DiffSection{file: curFile}
  818. curFile.Sections = append(curFile.Sections, curSection)
  819. lastLeftIdx = -1
  820. }
  821. if lastLeftIdx > -1 {
  822. diffLine.Match = lastLeftIdx
  823. curSection.Lines[lastLeftIdx].Match = len(curSection.Lines)
  824. lastLeftIdx++
  825. if lastLeftIdx >= len(curSection.Lines) || curSection.Lines[lastLeftIdx].Type != DiffLineDel {
  826. lastLeftIdx = -1
  827. }
  828. }
  829. curSection.Lines = append(curSection.Lines, diffLine)
  830. // Parse submodule additions
  831. if curFile.SubmoduleDiffInfo != nil {
  832. if ref, found := bytes.CutPrefix(lineBytes, []byte("+Subproject commit ")); found {
  833. curFile.SubmoduleDiffInfo.NewRefID = string(bytes.TrimSpace(ref))
  834. }
  835. }
  836. case '-':
  837. curFileLinesCount++
  838. curFile.Deletion++
  839. if maxLines > -1 && curFileLinesCount >= maxLines {
  840. curFile.IsIncomplete = true
  841. continue
  842. }
  843. diffLine := &DiffLine{Type: DiffLineDel, LeftIdx: leftLine, Match: -1}
  844. if leftLine > 0 {
  845. leftLine++
  846. }
  847. if curSection == nil {
  848. // Create a new section to represent this hunk
  849. curSection = &DiffSection{file: curFile}
  850. curFile.Sections = append(curFile.Sections, curSection)
  851. lastLeftIdx = -1
  852. }
  853. if len(curSection.Lines) == 0 || curSection.Lines[len(curSection.Lines)-1].Type != DiffLineDel {
  854. lastLeftIdx = len(curSection.Lines)
  855. }
  856. curSection.Lines = append(curSection.Lines, diffLine)
  857. // Parse submodule deletion
  858. if curFile.SubmoduleDiffInfo != nil {
  859. if ref, found := bytes.CutPrefix(lineBytes, []byte("-Subproject commit ")); found {
  860. curFile.SubmoduleDiffInfo.PreviousRefID = string(bytes.TrimSpace(ref))
  861. }
  862. }
  863. case ' ':
  864. curFileLinesCount++
  865. if maxLines > -1 && curFileLinesCount >= maxLines {
  866. curFile.IsIncomplete = true
  867. continue
  868. }
  869. diffLine := &DiffLine{Type: DiffLinePlain, LeftIdx: leftLine, RightIdx: rightLine}
  870. leftLine++
  871. rightLine++
  872. lastLeftIdx = -1
  873. if curSection == nil {
  874. // Create a new section to represent this hunk
  875. curSection = &DiffSection{file: curFile}
  876. curFile.Sections = append(curFile.Sections, curSection)
  877. }
  878. curSection.Lines = append(curSection.Lines, diffLine)
  879. default:
  880. // This is unexpected
  881. return nil, false, fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
  882. }
  883. line := string(lineBytes)
  884. if isFragment {
  885. curFile.IsIncomplete = true
  886. curFile.IsIncompleteLineTooLong = true
  887. for isFragment {
  888. lineBytes, isFragment, err = input.ReadLine()
  889. if err != nil {
  890. // Now by the definition of ReadLine this cannot be io.EOF
  891. return lineBytes, isFragment, fmt.Errorf("unable to ReadLine: %w", err)
  892. }
  893. }
  894. }
  895. if len(line) > maxLineCharacters {
  896. curFile.IsIncomplete = true
  897. curFile.IsIncompleteLineTooLong = true
  898. line = line[:maxLineCharacters]
  899. }
  900. curSection.Lines[len(curSection.Lines)-1].Content = line
  901. // handle LFS
  902. if line[1:] == lfs.MetaFileIdentifier {
  903. curFileLFSPrefix = true
  904. } else if curFileLFSPrefix && strings.HasPrefix(line[1:], lfs.MetaFileOidPrefix) {
  905. oid := strings.TrimPrefix(line[1:], lfs.MetaFileOidPrefix)
  906. if len(oid) == 64 {
  907. m := &git_model.LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}
  908. count, err := db.CountByBean(ctx, m)
  909. if err == nil && count > 0 {
  910. curFile.IsBin = true
  911. curFile.IsLFSFile = true
  912. curSection.Lines = nil
  913. lastLeftIdx = -1
  914. }
  915. }
  916. }
  917. }
  918. }
  919. func createDiffFile(line string) *DiffFile {
  920. // The a/ and b/ filenames are the same unless rename/copy is involved.
  921. // Especially, even for a creation or a deletion, /dev/null is not used
  922. // in place of the a/ or b/ filenames.
  923. //
  924. // When rename/copy is involved, file1 and file2 show the name of the
  925. // source file of the rename/copy and the name of the file that rename/copy
  926. // produces, respectively.
  927. //
  928. // Path names are quoted if necessary.
  929. //
  930. // This means that you should always be able to determine the file name even when
  931. // there is potential ambiguity...
  932. //
  933. // but we can be simpler with our heuristics by just forcing git to prefix things nicely
  934. curFile := &DiffFile{
  935. Type: DiffFileChange,
  936. Sections: make([]*DiffSection, 0, 10),
  937. }
  938. rd := strings.NewReader(line[len(cmdDiffHead):] + " ")
  939. curFile.Type = DiffFileChange
  940. var oldNameAmbiguity, newNameAmbiguity bool
  941. curFile.OldName, oldNameAmbiguity = readFileName(rd)
  942. curFile.Name, newNameAmbiguity = readFileName(rd)
  943. if oldNameAmbiguity && newNameAmbiguity {
  944. curFile.isAmbiguous = true
  945. // OK we should bet that the oldName and the newName are the same if they can be made to be same
  946. // So we need to start again ...
  947. if (len(line)-len(cmdDiffHead)-1)%2 == 0 {
  948. // diff --git a/b b/b b/b b/b b/b b/b
  949. //
  950. midpoint := (len(line) + len(cmdDiffHead) - 1) / 2
  951. newPart, oldPart := line[len(cmdDiffHead):midpoint], line[midpoint+1:]
  952. if len(newPart) > 2 && len(oldPart) > 2 && newPart[2:] == oldPart[2:] {
  953. curFile.OldName = oldPart[2:]
  954. curFile.Name = oldPart[2:]
  955. }
  956. }
  957. }
  958. curFile.IsRenamed = curFile.Name != curFile.OldName
  959. return curFile
  960. }
  961. func readFileName(rd *strings.Reader) (string, bool) {
  962. ambiguity := false
  963. var name string
  964. char, _ := rd.ReadByte()
  965. _ = rd.UnreadByte()
  966. if char == '"' {
  967. _, _ = fmt.Fscanf(rd, "%q ", &name)
  968. if len(name) == 0 {
  969. log.Error("Reader has no file name: reader=%+v", rd)
  970. return "", true
  971. }
  972. if name[0] == '\\' {
  973. name = name[1:]
  974. }
  975. } else {
  976. // This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone
  977. ambiguity = true
  978. _, _ = fmt.Fscanf(rd, "%s ", &name)
  979. char, _ := rd.ReadByte()
  980. _ = rd.UnreadByte()
  981. for !(char == 0 || char == '"' || char == 'b') {
  982. var suffix string
  983. _, _ = fmt.Fscanf(rd, "%s ", &suffix)
  984. name += " " + suffix
  985. char, _ = rd.ReadByte()
  986. _ = rd.UnreadByte()
  987. }
  988. }
  989. if len(name) < 2 {
  990. log.Error("Unable to determine name from reader: reader=%+v", rd)
  991. return "", true
  992. }
  993. return name[2:], ambiguity
  994. }
  995. // DiffOptions represents the options for a DiffRange
  996. type DiffOptions struct {
  997. BeforeCommitID string
  998. AfterCommitID string
  999. SkipTo string
  1000. MaxLines int
  1001. MaxLineCharacters int
  1002. MaxFiles int
  1003. WhitespaceBehavior gitcmd.TrustedCmdArgs
  1004. DirectComparison bool
  1005. }
  1006. func guessBeforeCommitForDiff(gitRepo *git.Repository, beforeCommitID string, afterCommit *git.Commit) (actualBeforeCommit *git.Commit, actualBeforeCommitID git.ObjectID, err error) {
  1007. commitObjectFormat := afterCommit.ID.Type()
  1008. isBeforeCommitIDEmpty := beforeCommitID == "" || beforeCommitID == commitObjectFormat.EmptyObjectID().String()
  1009. if isBeforeCommitIDEmpty && afterCommit.ParentCount() == 0 {
  1010. actualBeforeCommitID = commitObjectFormat.EmptyTree()
  1011. } else {
  1012. if isBeforeCommitIDEmpty {
  1013. actualBeforeCommit, err = afterCommit.Parent(0)
  1014. } else {
  1015. actualBeforeCommit, err = gitRepo.GetCommit(beforeCommitID)
  1016. }
  1017. if err != nil {
  1018. return nil, nil, err
  1019. }
  1020. actualBeforeCommitID = actualBeforeCommit.ID
  1021. }
  1022. return actualBeforeCommit, actualBeforeCommitID, nil
  1023. }
  1024. // getDiffBasic builds a Diff between two commits of a repository.
  1025. // Passing the empty string as beforeCommitID returns a diff from the parent commit.
  1026. // The whitespaceBehavior is either an empty string or a git flag
  1027. // Returned beforeCommit could be nil if the afterCommit doesn't have parent commit
  1028. func getDiffBasic(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (_ *Diff, beforeCommit, afterCommit *git.Commit, err error) {
  1029. repoPath := gitRepo.Path
  1030. afterCommit, err = gitRepo.GetCommit(opts.AfterCommitID)
  1031. if err != nil {
  1032. return nil, nil, nil, err
  1033. }
  1034. beforeCommit, beforeCommitID, err := guessBeforeCommitForDiff(gitRepo, opts.BeforeCommitID, afterCommit)
  1035. if err != nil {
  1036. return nil, nil, nil, err
  1037. }
  1038. cmdDiff := gitcmd.NewCommand().
  1039. AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
  1040. AddArguments(opts.WhitespaceBehavior...)
  1041. // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
  1042. // so if we are using at least this version of git we don't have to tell ParsePatch to do
  1043. // the skipping for us
  1044. parsePatchSkipToFile := opts.SkipTo
  1045. if opts.SkipTo != "" && git.DefaultFeatures().CheckVersionAtLeast("2.31") {
  1046. cmdDiff.AddOptionFormat("--skip-to=%s", opts.SkipTo)
  1047. parsePatchSkipToFile = ""
  1048. }
  1049. cmdDiff.AddDynamicArguments(beforeCommitID.String(), opts.AfterCommitID)
  1050. cmdDiff.AddDashesAndList(files...)
  1051. cmdCtx, cmdCancel := context.WithCancel(ctx)
  1052. defer cmdCancel()
  1053. reader, writer := io.Pipe()
  1054. defer func() {
  1055. _ = reader.Close()
  1056. _ = writer.Close()
  1057. }()
  1058. go func() {
  1059. stderr := &bytes.Buffer{}
  1060. if err := cmdDiff.Run(cmdCtx, &gitcmd.RunOpts{
  1061. Timeout: time.Duration(setting.Git.Timeout.Default) * time.Second,
  1062. Dir: repoPath,
  1063. Stdout: writer,
  1064. Stderr: stderr,
  1065. }); err != nil && !git.IsErrCanceledOrKilled(err) {
  1066. log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String())
  1067. }
  1068. _ = writer.Close()
  1069. }()
  1070. diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
  1071. // Ensure the git process is killed if it didn't exit already
  1072. cmdCancel()
  1073. if err != nil {
  1074. return nil, nil, nil, fmt.Errorf("unable to ParsePatch: %w", err)
  1075. }
  1076. diff.Start = opts.SkipTo
  1077. return diff, beforeCommit, afterCommit, nil
  1078. }
  1079. func GetDiffForAPI(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
  1080. diff, _, _, err := getDiffBasic(ctx, gitRepo, opts, files...)
  1081. return diff, err
  1082. }
  1083. func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
  1084. diff, beforeCommit, afterCommit, err := getDiffBasic(ctx, gitRepo, opts, files...)
  1085. if err != nil {
  1086. return nil, err
  1087. }
  1088. checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, []string{attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage, attribute.Diff})
  1089. if err != nil {
  1090. return nil, err
  1091. }
  1092. defer checker.Close()
  1093. for _, diffFile := range diff.Files {
  1094. isVendored := optional.None[bool]()
  1095. isGenerated := optional.None[bool]()
  1096. attrDiff := optional.None[string]()
  1097. attrs, err := checker.CheckPath(diffFile.Name)
  1098. if err == nil {
  1099. isVendored, isGenerated = attrs.GetVendored(), attrs.GetGenerated()
  1100. language := attrs.GetLanguage()
  1101. if language.Has() {
  1102. diffFile.Language = language.Value()
  1103. }
  1104. attrDiff = attrs.Get(attribute.Diff).ToString()
  1105. }
  1106. // Populate Submodule URLs
  1107. if diffFile.SubmoduleDiffInfo != nil {
  1108. diffFile.SubmoduleDiffInfo.PopulateURL(repoLink, diffFile, beforeCommit, afterCommit)
  1109. }
  1110. if !isVendored.Has() {
  1111. isVendored = optional.Some(analyze.IsVendor(diffFile.Name))
  1112. }
  1113. diffFile.IsVendored = isVendored.Value()
  1114. if !isGenerated.Has() {
  1115. isGenerated = optional.Some(analyze.IsGenerated(diffFile.Name))
  1116. }
  1117. diffFile.IsGenerated = isGenerated.Value()
  1118. tailSection, limitedContent := diffFile.GetTailSectionAndLimitedContent(beforeCommit, afterCommit)
  1119. if tailSection != nil {
  1120. diffFile.Sections = append(diffFile.Sections, tailSection)
  1121. }
  1122. shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == ""
  1123. if shouldFullFileHighlight {
  1124. if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize {
  1125. diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String())
  1126. }
  1127. if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize {
  1128. diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String())
  1129. }
  1130. }
  1131. }
  1132. return diff, nil
  1133. }
  1134. func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML {
  1135. highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content)
  1136. splitLines := strings.Split(string(highlightedNewContent), "\n")
  1137. lines := make(map[int]template.HTML, len(splitLines))
  1138. // only save the highlighted lines we need, but not the whole file, to save memory
  1139. for _, sec := range diffFile.Sections {
  1140. for _, ln := range sec.Lines {
  1141. lineIdx := ln.LeftIdx
  1142. if !isLeft {
  1143. lineIdx = ln.RightIdx
  1144. }
  1145. if lineIdx >= 1 {
  1146. idx := lineIdx - 1
  1147. if idx < len(splitLines) {
  1148. lines[idx] = template.HTML(splitLines[idx])
  1149. }
  1150. }
  1151. }
  1152. }
  1153. return lines
  1154. }
  1155. type DiffShortStat struct {
  1156. NumFiles, TotalAddition, TotalDeletion int
  1157. }
  1158. func GetDiffShortStat(gitRepo *git.Repository, beforeCommitID, afterCommitID string) (*DiffShortStat, error) {
  1159. repoPath := gitRepo.Path
  1160. afterCommit, err := gitRepo.GetCommit(afterCommitID)
  1161. if err != nil {
  1162. return nil, err
  1163. }
  1164. _, actualBeforeCommitID, err := guessBeforeCommitForDiff(gitRepo, beforeCommitID, afterCommit)
  1165. if err != nil {
  1166. return nil, err
  1167. }
  1168. diff := &DiffShortStat{}
  1169. diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStatByCmdArgs(gitRepo.Ctx, repoPath, nil, actualBeforeCommitID.String(), afterCommitID)
  1170. if err != nil {
  1171. return nil, err
  1172. }
  1173. return diff, nil
  1174. }
  1175. // SyncUserSpecificDiff inserts user-specific data such as which files the user has already viewed on the given diff
  1176. // Additionally, the database is updated asynchronously if files have changed since the last review
  1177. func SyncUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, diff *Diff, opts *DiffOptions) (*pull_model.ReviewState, error) {
  1178. review, err := pull_model.GetNewestReviewState(ctx, userID, pull.ID)
  1179. if err != nil {
  1180. return nil, err
  1181. }
  1182. if review == nil || len(review.UpdatedFiles) == 0 {
  1183. return review, nil
  1184. }
  1185. latestCommit := opts.AfterCommitID
  1186. if latestCommit == "" {
  1187. latestCommit = pull.HeadBranch // opts.AfterCommitID is preferred because it handles PRs from forks correctly and the branch name doesn't
  1188. }
  1189. changedFiles, errIgnored := gitRepo.GetFilesChangedBetween(review.CommitSHA, latestCommit)
  1190. // There are way too many possible errors.
  1191. // Examples are various git errors such as the commit the review was based on was gc'ed and hence doesn't exist anymore as well as unrecoverable errors where we should serve a 500 response
  1192. // Due to the current architecture and physical limitation of needing to compare explicit error messages, we can only choose one approach without the code getting ugly
  1193. // For SOME of the errors such as the gc'ed commit, it would be best to mark all files as changed
  1194. // But as that does not work for all potential errors, we simply mark all files as unchanged and drop the error which always works, even if not as good as possible
  1195. if errIgnored != nil {
  1196. log.Error("Could not get changed files between %s and %s for pull request %d in repo with path %s. Assuming no changes. Error: %w", review.CommitSHA, latestCommit, pull.Index, gitRepo.Path, err)
  1197. }
  1198. filesChangedSinceLastDiff := make(map[string]pull_model.ViewedState)
  1199. outer:
  1200. for _, diffFile := range diff.Files {
  1201. fileViewedState := review.UpdatedFiles[diffFile.GetDiffFileName()]
  1202. // Check whether it was previously detected that the file has changed since the last review
  1203. if fileViewedState == pull_model.HasChanged {
  1204. diffFile.HasChangedSinceLastReview = true
  1205. continue
  1206. }
  1207. filename := diffFile.GetDiffFileName()
  1208. // Check explicitly whether the file has changed since the last review
  1209. for _, changedFile := range changedFiles {
  1210. diffFile.HasChangedSinceLastReview = filename == changedFile
  1211. if diffFile.HasChangedSinceLastReview {
  1212. filesChangedSinceLastDiff[filename] = pull_model.HasChanged
  1213. continue outer // We don't want to check if the file is viewed here as that would fold the file, which is in this case unwanted
  1214. }
  1215. }
  1216. // Check whether the file has already been viewed
  1217. if fileViewedState == pull_model.Viewed {
  1218. diffFile.IsViewed = true
  1219. diff.NumViewedFiles++
  1220. }
  1221. }
  1222. // Explicitly store files that have changed in the database, if any is present at all.
  1223. // This has the benefit that the "Has Changed" attribute will be present as long as the user does not explicitly mark this file as viewed, so it will even survive a page reload after marking another file as viewed.
  1224. // On the other hand, this means that even if a commit reverting an unseen change is committed, the file will still be seen as changed.
  1225. if len(filesChangedSinceLastDiff) > 0 {
  1226. err := pull_model.UpdateReviewState(ctx, review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff)
  1227. if err != nil {
  1228. log.Warn("Could not update review for user %d, pull %d, commit %s and the changed files %v: %v", review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff, err)
  1229. return nil, err
  1230. }
  1231. }
  1232. return review, nil
  1233. }
  1234. // CommentAsDiff returns c.Patch as *Diff
  1235. func CommentAsDiff(ctx context.Context, c *issues_model.Comment) (*Diff, error) {
  1236. diff, err := ParsePatch(ctx, setting.Git.MaxGitDiffLines,
  1237. setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch), "")
  1238. if err != nil {
  1239. log.Error("Unable to parse patch: %v", err)
  1240. return nil, err
  1241. }
  1242. if len(diff.Files) == 0 {
  1243. return nil, fmt.Errorf("no file found for comment ID: %d", c.ID)
  1244. }
  1245. secs := diff.Files[0].Sections
  1246. if len(secs) == 0 {
  1247. return nil, fmt.Errorf("no sections found for comment ID: %d", c.ID)
  1248. }
  1249. return diff, nil
  1250. }
  1251. // CommentMustAsDiff executes AsDiff and logs the error instead of returning
  1252. func CommentMustAsDiff(ctx context.Context, c *issues_model.Comment) *Diff {
  1253. if c == nil {
  1254. return nil
  1255. }
  1256. defer func() {
  1257. if err := recover(); err != nil {
  1258. log.Error("PANIC whilst retrieving diff for comment[%d] Error: %v\nStack: %s", c.ID, err, log.Stack(2))
  1259. }
  1260. }()
  1261. diff, err := CommentAsDiff(ctx, c)
  1262. if err != nil {
  1263. log.Warn("CommentMustAsDiff: %v", err)
  1264. }
  1265. return diff
  1266. }
  1267. // GetWhitespaceFlag returns git diff flag for treating whitespaces
  1268. func GetWhitespaceFlag(whitespaceBehavior string) gitcmd.TrustedCmdArgs {
  1269. whitespaceFlags := map[string]gitcmd.TrustedCmdArgs{
  1270. "ignore-all": {"-w"},
  1271. "ignore-change": {"-b"},
  1272. "ignore-eol": {"--ignore-space-at-eol"},
  1273. "show-all": nil,
  1274. }
  1275. if flag, ok := whitespaceFlags[whitespaceBehavior]; ok {
  1276. return flag
  1277. }
  1278. return nil
  1279. }