| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252 |
- // Copyright 2025 The Gitea Authors. All rights reserved.
- // SPDX-License-Identifier: MIT
-
- package gitdiff
-
- import (
- "bufio"
- "context"
- "errors"
- "fmt"
- "io"
- "strconv"
- "strings"
-
- "code.gitea.io/gitea/modules/git"
- "code.gitea.io/gitea/modules/git/gitcmd"
- "code.gitea.io/gitea/modules/log"
- )
-
- type DiffTree struct {
- Files []*DiffTreeRecord
- }
-
- type DiffTreeRecord struct {
- // Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied', 'typechanged', 'unmerged', 'unknown'
- Status string
-
- // For renames and copies, the percentage of similarity between the source and target of the move/rename.
- Score uint8
-
- HeadPath string
- BasePath string
- HeadMode git.EntryMode
- BaseMode git.EntryMode
- HeadBlobID string
- BaseBlobID string
- }
-
- // GetDiffTree returns the list of path of the files that have changed between the two commits.
- // If useMergeBase is true, the diff will be calculated using the merge base of the two commits.
- // This is the same behavior as using a three-dot diff in git diff.
- func GetDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (*DiffTree, error) {
- gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, useMergeBase, baseSha, headSha)
- if err != nil {
- return nil, err
- }
-
- return &DiffTree{
- Files: gitDiffTreeRecords,
- }, nil
- }
-
- func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) ([]*DiffTreeRecord, error) {
- useMergeBase, baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, useMergeBase, baseSha, headSha)
- if err != nil {
- return nil, err
- }
-
- cmd := gitcmd.NewCommand("diff-tree", "--raw", "-r", "--find-renames", "--root")
- if useMergeBase {
- cmd.AddArguments("--merge-base")
- }
- cmd.AddDynamicArguments(baseCommitID, headCommitID)
- stdout, _, runErr := cmd.RunStdString(ctx, &gitcmd.RunOpts{Dir: gitRepo.Path})
- if runErr != nil {
- log.Warn("git diff-tree: %v", runErr)
- return nil, runErr
- }
-
- return parseGitDiffTree(strings.NewReader(stdout))
- }
-
- func validateGitDiffTreeArguments(gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (shouldUseMergeBase bool, resolvedBaseSha, resolvedHeadSha string, err error) {
- // if the head is empty its an error
- if headSha == "" {
- return false, "", "", errors.New("headSha is empty")
- }
-
- // if the head commit doesn't exist its and error
- headCommit, err := gitRepo.GetCommit(headSha)
- if err != nil {
- return false, "", "", fmt.Errorf("failed to get commit headSha: %v", err)
- }
- headCommitID := headCommit.ID.String()
-
- // if the base is empty we should use the parent of the head commit
- if baseSha == "" {
- // if the headCommit has no parent we should use an empty commit
- // this can happen when we are generating a diff against an orphaned commit
- if headCommit.ParentCount() == 0 {
- objectFormat, err := gitRepo.GetObjectFormat()
- if err != nil {
- return false, "", "", err
- }
-
- // We set use merge base to false because we have no base commit
- return false, objectFormat.EmptyTree().String(), headCommitID, nil
- }
-
- baseCommit, err := headCommit.Parent(0)
- if err != nil {
- return false, "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err)
- }
- return useMergeBase, baseCommit.ID.String(), headCommitID, nil
- }
-
- // try and get the base commit
- baseCommit, err := gitRepo.GetCommit(baseSha)
- // propagate the error if we couldn't get the base commit
- if err != nil {
- return useMergeBase, "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err)
- }
-
- return useMergeBase, baseCommit.ID.String(), headCommit.ID.String(), nil
- }
-
- func parseGitDiffTree(gitOutput io.Reader) ([]*DiffTreeRecord, error) {
- /*
- The output of `git diff-tree --raw -r --find-renames` is of the form:
-
- :<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<path>
-
- or for renames:
-
- :<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<old_path>\t<new_path>
-
- See: <https://git-scm.com/docs/git-diff-tree#_raw_output_format> for more details
- */
- results := make([]*DiffTreeRecord, 0)
-
- lines := bufio.NewScanner(gitOutput)
- for lines.Scan() {
- line := lines.Text()
-
- if len(line) == 0 {
- continue
- }
-
- record, err := parseGitDiffTreeLine(line)
- if err != nil {
- return nil, err
- }
-
- results = append(results, record)
- }
-
- if err := lines.Err(); err != nil {
- return nil, err
- }
-
- return results, nil
- }
-
- func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) {
- line = strings.TrimPrefix(line, ":")
- splitSections := strings.SplitN(line, "\t", 2)
- if len(splitSections) < 2 {
- return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`)", line)
- }
-
- fields := strings.Fields(splitSections[0])
- if len(fields) < 5 {
- return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`, expected 5 space delimited values got %d)", line, len(fields))
- }
-
- baseMode, err := git.ParseEntryMode(fields[0])
- if err != nil {
- return nil, err
- }
-
- headMode, err := git.ParseEntryMode(fields[1])
- if err != nil {
- return nil, err
- }
-
- baseBlobID := fields[2]
- headBlobID := fields[3]
-
- status, score, err := statusFromLetter(fields[4])
- if err != nil {
- return nil, fmt.Errorf("unparsable output for diff-tree --raw: %s, error: %s", line, err)
- }
-
- filePaths := strings.Split(splitSections[1], "\t")
-
- var headPath, basePath string
- if status == "renamed" {
- if len(filePaths) != 2 {
- return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`, expected 2 paths found %d", line, len(filePaths))
- }
- basePath = filePaths[0]
- headPath = filePaths[1]
- } else {
- basePath = filePaths[0]
- headPath = filePaths[0]
- }
-
- return &DiffTreeRecord{
- Status: status,
- Score: score,
- BaseMode: baseMode,
- HeadMode: headMode,
- BaseBlobID: baseBlobID,
- HeadBlobID: headBlobID,
- BasePath: basePath,
- HeadPath: headPath,
- }, nil
- }
-
- func statusFromLetter(rawStatus string) (status string, score uint8, err error) {
- if len(rawStatus) < 1 {
- return "", 0, errors.New("empty status letter")
- }
- switch rawStatus[0] {
- case 'A':
- return "added", 0, nil
- case 'D':
- return "deleted", 0, nil
- case 'M':
- return "modified", 0, nil
- case 'R':
- score, err = tryParseStatusScore(rawStatus)
- return "renamed", score, err
- case 'C':
- score, err = tryParseStatusScore(rawStatus)
- return "copied", score, err
- case 'T':
- return "typechanged", 0, nil
- case 'U':
- return "unmerged", 0, nil
- case 'X':
- return "unknown", 0, nil
- default:
- return "", 0, fmt.Errorf("unknown status letter: '%s'", rawStatus)
- }
- }
-
- func tryParseStatusScore(rawStatus string) (uint8, error) {
- if len(rawStatus) < 2 {
- return 0, errors.New("status score missing")
- }
-
- score, err := strconv.ParseUint(rawStatus[1:], 10, 8)
- if err != nil {
- return 0, fmt.Errorf("failed to parse status score: %w", err)
- } else if score > 100 {
- return 0, fmt.Errorf("status score out of range: %d", score)
- }
-
- return uint8(score), nil
- }
|