gitea源码

elasticsearch.go 9.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package elasticsearch
  4. import (
  5. "context"
  6. "strconv"
  7. "strings"
  8. "code.gitea.io/gitea/modules/graceful"
  9. "code.gitea.io/gitea/modules/indexer"
  10. indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
  11. inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
  12. "code.gitea.io/gitea/modules/indexer/issues/internal"
  13. "code.gitea.io/gitea/modules/util"
  14. "github.com/olivere/elastic/v7"
  15. )
  16. const (
  17. issueIndexerLatestVersion = 2
  18. // multi-match-types, currently only 2 types are used
  19. // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
  20. esMultiMatchTypeBestFields = "best_fields"
  21. esMultiMatchTypePhrasePrefix = "phrase_prefix"
  22. )
  23. var _ internal.Indexer = &Indexer{}
  24. // Indexer implements Indexer interface
  25. type Indexer struct {
  26. inner *inner_elasticsearch.Indexer
  27. indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
  28. }
  29. func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
  30. // TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO"
  31. return indexer.SearchModesExactWords()
  32. }
  33. // NewIndexer creates a new elasticsearch indexer
  34. func NewIndexer(url, indexerName string) *Indexer {
  35. inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)
  36. indexer := &Indexer{
  37. inner: inner,
  38. Indexer: inner,
  39. }
  40. return indexer
  41. }
  42. const (
  43. defaultMapping = `
  44. {
  45. "mappings": {
  46. "properties": {
  47. "id": { "type": "integer", "index": true },
  48. "repo_id": { "type": "integer", "index": true },
  49. "is_public": { "type": "boolean", "index": true },
  50. "title": { "type": "text", "index": true },
  51. "content": { "type": "text", "index": true },
  52. "comments": { "type" : "text", "index": true },
  53. "is_pull": { "type": "boolean", "index": true },
  54. "is_closed": { "type": "boolean", "index": true },
  55. "is_archived": { "type": "boolean", "index": true },
  56. "label_ids": { "type": "integer", "index": true },
  57. "no_label": { "type": "boolean", "index": true },
  58. "milestone_id": { "type": "integer", "index": true },
  59. "project_id": { "type": "integer", "index": true },
  60. "project_board_id": { "type": "integer", "index": true },
  61. "poster_id": { "type": "integer", "index": true },
  62. "assignee_id": { "type": "integer", "index": true },
  63. "mention_ids": { "type": "integer", "index": true },
  64. "reviewed_ids": { "type": "integer", "index": true },
  65. "review_requested_ids": { "type": "integer", "index": true },
  66. "subscriber_ids": { "type": "integer", "index": true },
  67. "updated_unix": { "type": "integer", "index": true },
  68. "created_unix": { "type": "integer", "index": true },
  69. "deadline_unix": { "type": "integer", "index": true },
  70. "comment_count": { "type": "integer", "index": true }
  71. }
  72. }
  73. }
  74. `
  75. )
  76. // Index will save the index data
  77. func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
  78. if len(issues) == 0 {
  79. return nil
  80. } else if len(issues) == 1 {
  81. issue := issues[0]
  82. _, err := b.inner.Client.Index().
  83. Index(b.inner.VersionedIndexName()).
  84. Id(strconv.FormatInt(issue.ID, 10)).
  85. BodyJson(issue).
  86. Do(ctx)
  87. return err
  88. }
  89. reqs := make([]elastic.BulkableRequest, 0)
  90. for _, issue := range issues {
  91. reqs = append(reqs,
  92. elastic.NewBulkIndexRequest().
  93. Index(b.inner.VersionedIndexName()).
  94. Id(strconv.FormatInt(issue.ID, 10)).
  95. Doc(issue),
  96. )
  97. }
  98. _, err := b.inner.Client.Bulk().
  99. Index(b.inner.VersionedIndexName()).
  100. Add(reqs...).
  101. Do(graceful.GetManager().HammerContext())
  102. return err
  103. }
  104. // Delete deletes indexes by ids
  105. func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
  106. if len(ids) == 0 {
  107. return nil
  108. } else if len(ids) == 1 {
  109. _, err := b.inner.Client.Delete().
  110. Index(b.inner.VersionedIndexName()).
  111. Id(strconv.FormatInt(ids[0], 10)).
  112. Do(ctx)
  113. return err
  114. }
  115. reqs := make([]elastic.BulkableRequest, 0)
  116. for _, id := range ids {
  117. reqs = append(reqs,
  118. elastic.NewBulkDeleteRequest().
  119. Index(b.inner.VersionedIndexName()).
  120. Id(strconv.FormatInt(id, 10)),
  121. )
  122. }
  123. _, err := b.inner.Client.Bulk().
  124. Index(b.inner.VersionedIndexName()).
  125. Add(reqs...).
  126. Do(graceful.GetManager().HammerContext())
  127. return err
  128. }
  129. // Search searches for issues by given conditions.
  130. // Returns the matching issue IDs
  131. func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
  132. query := elastic.NewBoolQuery()
  133. if options.Keyword != "" {
  134. searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
  135. if searchMode == indexer.SearchModeExact {
  136. query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix))
  137. } else /* words */ {
  138. query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and"))
  139. }
  140. }
  141. if len(options.RepoIDs) > 0 {
  142. q := elastic.NewBoolQuery()
  143. q.Should(elastic.NewTermsQuery("repo_id", toAnySlice(options.RepoIDs)...))
  144. if options.AllPublic {
  145. q.Should(elastic.NewTermQuery("is_public", true))
  146. }
  147. query.Must(q)
  148. }
  149. if options.IsPull.Has() {
  150. query.Must(elastic.NewTermQuery("is_pull", options.IsPull.Value()))
  151. }
  152. if options.IsClosed.Has() {
  153. query.Must(elastic.NewTermQuery("is_closed", options.IsClosed.Value()))
  154. }
  155. if options.IsArchived.Has() {
  156. query.Must(elastic.NewTermQuery("is_archived", options.IsArchived.Value()))
  157. }
  158. if options.NoLabelOnly {
  159. query.Must(elastic.NewTermQuery("no_label", true))
  160. } else {
  161. if len(options.IncludedLabelIDs) > 0 {
  162. q := elastic.NewBoolQuery()
  163. for _, labelID := range options.IncludedLabelIDs {
  164. q.Must(elastic.NewTermQuery("label_ids", labelID))
  165. }
  166. query.Must(q)
  167. } else if len(options.IncludedAnyLabelIDs) > 0 {
  168. query.Must(elastic.NewTermsQuery("label_ids", toAnySlice(options.IncludedAnyLabelIDs)...))
  169. }
  170. if len(options.ExcludedLabelIDs) > 0 {
  171. q := elastic.NewBoolQuery()
  172. for _, labelID := range options.ExcludedLabelIDs {
  173. q.MustNot(elastic.NewTermQuery("label_ids", labelID))
  174. }
  175. query.Must(q)
  176. }
  177. }
  178. if len(options.MilestoneIDs) > 0 {
  179. query.Must(elastic.NewTermsQuery("milestone_id", toAnySlice(options.MilestoneIDs)...))
  180. }
  181. if options.ProjectID.Has() {
  182. query.Must(elastic.NewTermQuery("project_id", options.ProjectID.Value()))
  183. }
  184. if options.ProjectColumnID.Has() {
  185. query.Must(elastic.NewTermQuery("project_board_id", options.ProjectColumnID.Value()))
  186. }
  187. if options.PosterID != "" {
  188. // "(none)" becomes 0, it means no poster
  189. posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
  190. query.Must(elastic.NewTermQuery("poster_id", posterIDInt64))
  191. }
  192. if options.AssigneeID != "" {
  193. if options.AssigneeID == "(any)" {
  194. q := elastic.NewRangeQuery("assignee_id")
  195. q.Gte(1)
  196. query.Must(q)
  197. } else {
  198. // "(none)" becomes 0, it means no assignee
  199. assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
  200. query.Must(elastic.NewTermQuery("assignee_id", assigneeIDInt64))
  201. }
  202. }
  203. if options.MentionID.Has() {
  204. query.Must(elastic.NewTermQuery("mention_ids", options.MentionID.Value()))
  205. }
  206. if options.ReviewedID.Has() {
  207. query.Must(elastic.NewTermQuery("reviewed_ids", options.ReviewedID.Value()))
  208. }
  209. if options.ReviewRequestedID.Has() {
  210. query.Must(elastic.NewTermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
  211. }
  212. if options.SubscriberID.Has() {
  213. query.Must(elastic.NewTermQuery("subscriber_ids", options.SubscriberID.Value()))
  214. }
  215. if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
  216. q := elastic.NewRangeQuery("updated_unix")
  217. if options.UpdatedAfterUnix.Has() {
  218. q.Gte(options.UpdatedAfterUnix.Value())
  219. }
  220. if options.UpdatedBeforeUnix.Has() {
  221. q.Lte(options.UpdatedBeforeUnix.Value())
  222. }
  223. query.Must(q)
  224. }
  225. if options.SortBy == "" {
  226. options.SortBy = internal.SortByCreatedAsc
  227. }
  228. sortBy := []elastic.Sorter{
  229. parseSortBy(options.SortBy),
  230. elastic.NewFieldSort("id").Desc(),
  231. }
  232. // See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
  233. // TODO: make it configurable since it's configurable in elasticsearch
  234. const maxPageSize = 10000
  235. skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
  236. searchResult, err := b.inner.Client.Search().
  237. Index(b.inner.VersionedIndexName()).
  238. Query(query).
  239. SortBy(sortBy...).
  240. From(skip).Size(limit).
  241. Do(ctx)
  242. if err != nil {
  243. return nil, err
  244. }
  245. hits := make([]internal.Match, 0, limit)
  246. for _, hit := range searchResult.Hits.Hits {
  247. id, _ := strconv.ParseInt(hit.Id, 10, 64)
  248. hits = append(hits, internal.Match{
  249. ID: id,
  250. })
  251. }
  252. return &internal.SearchResult{
  253. Total: searchResult.TotalHits(),
  254. Hits: hits,
  255. }, nil
  256. }
  257. func toAnySlice[T any](s []T) []any {
  258. ret := make([]any, 0, len(s))
  259. for _, item := range s {
  260. ret = append(ret, item)
  261. }
  262. return ret
  263. }
  264. func parseSortBy(sortBy internal.SortBy) elastic.Sorter {
  265. field := strings.TrimPrefix(string(sortBy), "-")
  266. ret := elastic.NewFieldSort(field)
  267. if strings.HasPrefix(string(sortBy), "-") {
  268. ret.Desc()
  269. }
  270. return ret
  271. }