gitea源码

indexer_test.go 8.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package code
  4. import (
  5. "context"
  6. "os"
  7. "slices"
  8. "testing"
  9. "code.gitea.io/gitea/models/db"
  10. "code.gitea.io/gitea/models/unittest"
  11. indexer_module "code.gitea.io/gitea/modules/indexer"
  12. "code.gitea.io/gitea/modules/indexer/code/bleve"
  13. "code.gitea.io/gitea/modules/indexer/code/elasticsearch"
  14. "code.gitea.io/gitea/modules/indexer/code/internal"
  15. "code.gitea.io/gitea/modules/setting"
  16. "code.gitea.io/gitea/modules/test"
  17. "code.gitea.io/gitea/modules/util"
  18. _ "code.gitea.io/gitea/models"
  19. _ "code.gitea.io/gitea/models/actions"
  20. _ "code.gitea.io/gitea/models/activities"
  21. "github.com/stretchr/testify/assert"
  22. "github.com/stretchr/testify/require"
  23. )
  24. type codeSearchResult struct {
  25. Filename string
  26. Content string
  27. }
  28. func TestMain(m *testing.M) {
  29. unittest.MainTest(m)
  30. }
  31. func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
  32. t.Run(name, func(t *testing.T) {
  33. assert.NoError(t, setupRepositoryIndexes(t.Context(), indexer))
  34. keywords := []struct {
  35. RepoIDs []int64
  36. Keyword string
  37. Langs int
  38. SearchMode indexer_module.SearchModeType
  39. Results []codeSearchResult
  40. }{
  41. // Search for an exact match on the contents of a file
  42. // This scenario yields a single result (the file README.md on the repo '1')
  43. {
  44. RepoIDs: nil,
  45. Keyword: "Description",
  46. Langs: 1,
  47. Results: []codeSearchResult{
  48. {
  49. Filename: "README.md",
  50. Content: "# repo1\n\nDescription for repo1",
  51. },
  52. },
  53. },
  54. // Search for an exact match on the contents of a file within the repo '2'.
  55. // This scenario yields no results
  56. {
  57. RepoIDs: []int64{2},
  58. Keyword: "Description",
  59. Langs: 0,
  60. },
  61. // Search for an exact match on the contents of a file
  62. // This scenario yields a single result (the file README.md on the repo '1')
  63. {
  64. RepoIDs: nil,
  65. Keyword: "repo1",
  66. Langs: 1,
  67. Results: []codeSearchResult{
  68. {
  69. Filename: "README.md",
  70. Content: "# repo1\n\nDescription for repo1",
  71. },
  72. },
  73. },
  74. // Search for an exact match on the contents of a file within the repo '2'.
  75. // This scenario yields no results
  76. {
  77. RepoIDs: []int64{2},
  78. Keyword: "repo1",
  79. Langs: 0,
  80. },
  81. // Search for a non-existing term.
  82. // This scenario yields no results
  83. {
  84. RepoIDs: nil,
  85. Keyword: "non-exist",
  86. Langs: 0,
  87. },
  88. // Search for an exact match on the contents of a file within the repo '62'.
  89. // This scenario yields a single result (the file avocado.md on the repo '62')
  90. {
  91. RepoIDs: []int64{62},
  92. Keyword: "pineaple",
  93. Langs: 1,
  94. Results: []codeSearchResult{
  95. {
  96. Filename: "avocado.md",
  97. Content: "# repo1\n\npineaple pie of cucumber juice",
  98. },
  99. },
  100. },
  101. // Search for an exact match on the filename within the repo '62'.
  102. // This scenario yields a single result (the file avocado.md on the repo '62')
  103. {
  104. RepoIDs: []int64{62},
  105. Keyword: "avocado.md",
  106. Langs: 1,
  107. Results: []codeSearchResult{
  108. {
  109. Filename: "avocado.md",
  110. Content: "# repo1\n\npineaple pie of cucumber juice",
  111. },
  112. },
  113. },
  114. // Search for an partial match on the filename within the repo '62'.
  115. // This scenario yields a single result (the file avocado.md on the repo '62')
  116. {
  117. RepoIDs: []int64{62},
  118. Keyword: "avo",
  119. Langs: 1,
  120. Results: []codeSearchResult{
  121. {
  122. Filename: "avocado.md",
  123. Content: "# repo1\n\npineaple pie of cucumber juice",
  124. },
  125. },
  126. },
  127. // Search for matches on both the contents and the filenames within the repo '62'.
  128. // This scenario yields two results: the first result is based on the file (cucumber.md) while the second is based on the contents
  129. {
  130. RepoIDs: []int64{62},
  131. Keyword: "cucumber",
  132. Langs: 1,
  133. Results: []codeSearchResult{
  134. {
  135. Filename: "cucumber.md",
  136. Content: "Salad is good for your health",
  137. },
  138. {
  139. Filename: "avocado.md",
  140. Content: "# repo1\n\npineaple pie of cucumber juice",
  141. },
  142. },
  143. },
  144. // Search for matches on the filenames within the repo '62'.
  145. // This scenario yields two results (both are based on filename, the first one is an exact match)
  146. {
  147. RepoIDs: []int64{62},
  148. Keyword: "ham",
  149. Langs: 1,
  150. Results: []codeSearchResult{
  151. {
  152. Filename: "ham.md",
  153. Content: "This is also not cheese",
  154. },
  155. {
  156. Filename: "potato/ham.md",
  157. Content: "This is not cheese",
  158. },
  159. },
  160. },
  161. // Search for matches on the contents of files within the repo '62'.
  162. // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one)
  163. {
  164. RepoIDs: []int64{62},
  165. Keyword: "This is not cheese",
  166. Langs: 1,
  167. Results: []codeSearchResult{
  168. {
  169. Filename: "potato/ham.md",
  170. Content: "This is not cheese",
  171. },
  172. {
  173. Filename: "ham.md",
  174. Content: "This is also not cheese",
  175. },
  176. },
  177. },
  178. // Search for matches on the contents of files regardless of case.
  179. {
  180. RepoIDs: nil,
  181. Keyword: "dESCRIPTION",
  182. Langs: 1,
  183. SearchMode: indexer_module.SearchModeFuzzy,
  184. Results: []codeSearchResult{
  185. {
  186. Filename: "README.md",
  187. Content: "# repo1\n\nDescription for repo1",
  188. },
  189. },
  190. },
  191. // Search for an exact match on the filename within the repo '62' (case-insensitive).
  192. // This scenario yields a single result (the file avocado.md on the repo '62')
  193. {
  194. RepoIDs: []int64{62},
  195. Keyword: "AVOCADO.MD",
  196. Langs: 1,
  197. Results: []codeSearchResult{
  198. {
  199. Filename: "avocado.md",
  200. Content: "# repo1\n\npineaple pie of cucumber juice",
  201. },
  202. },
  203. },
  204. // Search for matches on the contents of files when the criteria are an expression.
  205. {
  206. RepoIDs: []int64{62},
  207. Keyword: "console.log",
  208. Langs: 1,
  209. Results: []codeSearchResult{
  210. {
  211. Filename: "example-file.js",
  212. Content: "console.log(\"Hello, World!\")",
  213. },
  214. },
  215. },
  216. // Search for matches on the contents of files when the criteria are parts of an expression.
  217. {
  218. RepoIDs: []int64{62},
  219. Keyword: "log",
  220. Langs: 1,
  221. Results: []codeSearchResult{
  222. {
  223. Filename: "example-file.js",
  224. Content: "console.log(\"Hello, World!\")",
  225. },
  226. },
  227. },
  228. }
  229. for _, kw := range keywords {
  230. t.Run(kw.Keyword, func(t *testing.T) {
  231. total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{
  232. RepoIDs: kw.RepoIDs,
  233. Keyword: kw.Keyword,
  234. SearchMode: util.IfZero(kw.SearchMode, indexer_module.SearchModeWords),
  235. Paginator: &db.ListOptions{
  236. Page: 1,
  237. PageSize: 10,
  238. },
  239. })
  240. require.NoError(t, err)
  241. require.Len(t, langs, kw.Langs)
  242. hits := make([]codeSearchResult, 0, len(res))
  243. if total > 0 {
  244. assert.NotEmpty(t, kw.Results, "The given scenario does not provide any expected results")
  245. }
  246. for _, hit := range res {
  247. hits = append(hits, codeSearchResult{
  248. Filename: hit.Filename,
  249. Content: hit.Content,
  250. })
  251. }
  252. lastIndex := -1
  253. for _, expected := range kw.Results {
  254. index := slices.Index(hits, expected)
  255. if index == -1 {
  256. assert.Failf(t, "Result not found", "Expected %v in %v", expected, hits)
  257. } else if lastIndex > index {
  258. assert.Failf(t, "Result is out of order", "The order of %v within %v is wrong", expected, hits)
  259. } else {
  260. lastIndex = index
  261. }
  262. }
  263. })
  264. }
  265. assert.NoError(t, tearDownRepositoryIndexes(t.Context(), indexer))
  266. })
  267. }
  268. func TestBleveIndexAndSearch(t *testing.T) {
  269. unittest.PrepareTestEnv(t)
  270. defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
  271. dir := t.TempDir()
  272. idx := bleve.NewIndexer(dir)
  273. defer idx.Close()
  274. _, err := idx.Init(t.Context())
  275. require.NoError(t, err)
  276. testIndexer("bleve", t, idx)
  277. }
  278. func TestESIndexAndSearch(t *testing.T) {
  279. unittest.PrepareTestEnv(t)
  280. u := os.Getenv("TEST_INDEXER_CODE_ES_URL")
  281. if u == "" {
  282. t.SkipNow()
  283. return
  284. }
  285. indexer := elasticsearch.NewIndexer(u, "gitea_codes")
  286. if _, err := indexer.Init(t.Context()); err != nil {
  287. if indexer != nil {
  288. indexer.Close()
  289. }
  290. require.NoError(t, err, "Unable to init ES indexer")
  291. }
  292. defer indexer.Close()
  293. testIndexer("elastic_search", t, indexer)
  294. }
  295. func setupRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error {
  296. for _, repoID := range repositoriesToSearch() {
  297. if err := index(ctx, indexer, repoID); err != nil {
  298. return err
  299. }
  300. }
  301. return nil
  302. }
  303. func tearDownRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error {
  304. for _, repoID := range repositoriesToSearch() {
  305. if err := indexer.Delete(ctx, repoID); err != nil {
  306. return err
  307. }
  308. }
  309. return nil
  310. }
  311. func repositoriesToSearch() []int64 {
  312. return []int64{1, 62}
  313. }