gitea源码

language_stats_gogit.go 4.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. //go:build gogit
  4. package languagestats
  5. import (
  6. "bytes"
  7. "io"
  8. "code.gitea.io/gitea/modules/analyze"
  9. git_module "code.gitea.io/gitea/modules/git"
  10. "code.gitea.io/gitea/modules/git/attribute"
  11. "code.gitea.io/gitea/modules/optional"
  12. "github.com/go-enry/go-enry/v2"
  13. "github.com/go-git/go-git/v5"
  14. "github.com/go-git/go-git/v5/plumbing"
  15. "github.com/go-git/go-git/v5/plumbing/object"
  16. )
  17. // GetLanguageStats calculates language stats for git repository at specified commit
  18. func GetLanguageStats(repo *git_module.Repository, commitID string) (map[string]int64, error) {
  19. r, err := git.PlainOpen(repo.Path)
  20. if err != nil {
  21. return nil, err
  22. }
  23. rev, err := r.ResolveRevision(plumbing.Revision(commitID))
  24. if err != nil {
  25. return nil, err
  26. }
  27. commit, err := r.CommitObject(*rev)
  28. if err != nil {
  29. return nil, err
  30. }
  31. tree, err := commit.Tree()
  32. if err != nil {
  33. return nil, err
  34. }
  35. checker, err := attribute.NewBatchChecker(repo, commitID, attribute.LinguistAttributes)
  36. if err != nil {
  37. return nil, err
  38. }
  39. defer checker.Close()
  40. // sizes contains the current calculated size of all files by language
  41. sizes := make(map[string]int64)
  42. // by default we will only count the sizes of programming languages or markup languages
  43. // unless they are explicitly set using linguist-language
  44. includedLanguage := map[string]bool{}
  45. // or if there's only one language in the repository
  46. firstExcludedLanguage := ""
  47. firstExcludedLanguageSize := int64(0)
  48. err = tree.Files().ForEach(func(f *object.File) error {
  49. if f.Size == 0 {
  50. return nil
  51. }
  52. isVendored := optional.None[bool]()
  53. isGenerated := optional.None[bool]()
  54. isDocumentation := optional.None[bool]()
  55. isDetectable := optional.None[bool]()
  56. attrs, err := checker.CheckPath(f.Name)
  57. if err == nil {
  58. isVendored = attrs.GetVendored()
  59. if isVendored.ValueOrDefault(false) {
  60. return nil
  61. }
  62. isGenerated = attrs.GetGenerated()
  63. if isGenerated.ValueOrDefault(false) {
  64. return nil
  65. }
  66. isDocumentation = attrs.GetDocumentation()
  67. if isDocumentation.ValueOrDefault(false) {
  68. return nil
  69. }
  70. isDetectable = attrs.GetDetectable()
  71. if !isDetectable.ValueOrDefault(true) {
  72. return nil
  73. }
  74. hasLanguage := attrs.GetLanguage()
  75. if hasLanguage.Value() != "" {
  76. language := hasLanguage.Value()
  77. // group languages, such as Pug -> HTML; SCSS -> CSS
  78. group := enry.GetLanguageGroup(language)
  79. if len(group) != 0 {
  80. language = group
  81. }
  82. // this language will always be added to the size
  83. sizes[language] += f.Size
  84. return nil
  85. }
  86. }
  87. if (!isVendored.Has() && analyze.IsVendor(f.Name)) ||
  88. enry.IsDotFile(f.Name) ||
  89. (!isDocumentation.Has() && enry.IsDocumentation(f.Name)) ||
  90. enry.IsConfiguration(f.Name) {
  91. return nil
  92. }
  93. // If content can not be read or file is too big just do detection by filename
  94. var content []byte
  95. if f.Size <= bigFileSize {
  96. content, _ = readFile(f, fileSizeLimit)
  97. }
  98. if !isGenerated.Has() && enry.IsGenerated(f.Name, content) {
  99. return nil
  100. }
  101. language := analyze.GetCodeLanguage(f.Name, content)
  102. if language == enry.OtherLanguage || language == "" {
  103. return nil
  104. }
  105. // group languages, such as Pug -> HTML; SCSS -> CSS
  106. group := enry.GetLanguageGroup(language)
  107. if group != "" {
  108. language = group
  109. }
  110. included, checked := includedLanguage[language]
  111. if !checked {
  112. langtype := enry.GetLanguageType(language)
  113. included = langtype == enry.Programming || langtype == enry.Markup
  114. includedLanguage[language] = included
  115. }
  116. if included || isDetectable.ValueOrDefault(false) {
  117. sizes[language] += f.Size
  118. } else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
  119. firstExcludedLanguage = language
  120. firstExcludedLanguageSize += f.Size
  121. }
  122. return nil
  123. })
  124. if err != nil {
  125. return nil, err
  126. }
  127. // If there are no included languages add the first excluded language
  128. if len(sizes) == 0 && firstExcludedLanguage != "" {
  129. sizes[firstExcludedLanguage] = firstExcludedLanguageSize
  130. }
  131. return mergeLanguageStats(sizes), nil
  132. }
  133. func readFile(f *object.File, limit int64) ([]byte, error) {
  134. r, err := f.Reader()
  135. if err != nil {
  136. return nil, err
  137. }
  138. defer r.Close()
  139. if limit <= 0 {
  140. return io.ReadAll(r)
  141. }
  142. size := f.Size
  143. if limit > 0 && size > limit {
  144. size = limit
  145. }
  146. buf := bytes.NewBuffer(nil)
  147. buf.Grow(int(size))
  148. _, err = io.Copy(buf, io.LimitReader(r, limit))
  149. return buf.Bytes(), err
  150. }