gitea源码


  1. // Copyright 2018 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package bleve
  4. import (
  5. "context"
  6. "strconv"
  7. "code.gitea.io/gitea/modules/indexer"
  8. indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
  9. inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
  10. "code.gitea.io/gitea/modules/indexer/issues/internal"
  11. "code.gitea.io/gitea/modules/optional"
  12. "code.gitea.io/gitea/modules/util"
  13. "github.com/blevesearch/bleve/v2"
  14. "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
  15. "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
  16. "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
  17. "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
  18. "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
  19. "github.com/blevesearch/bleve/v2/mapping"
  20. "github.com/blevesearch/bleve/v2/search/query"
  21. )
  22. const (
  23. issueIndexerAnalyzer = "issueIndexer"
  24. issueIndexerDocType = "issueIndexerDocType"
  25. issueIndexerLatestVersion = 5
  26. )
  27. const unicodeNormalizeName = "unicodeNormalize"
  28. func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
  29. return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
  30. "type": unicodenorm.Name,
  31. "form": unicodenorm.NFC,
  32. })
  33. }
  34. const maxBatchSize = 16
  35. // IndexerData an update to the issue indexer
  36. type IndexerData internal.IndexerData
  37. // Type returns the document type, for bleve's mapping.Classifier interface.
  38. func (i *IndexerData) Type() string {
  39. return issueIndexerDocType
  40. }
  41. // generateIssueIndexMapping generates the bleve index mapping for issues
  42. func generateIssueIndexMapping() (mapping.IndexMapping, error) {
  43. mapping := bleve.NewIndexMapping()
  44. docMapping := bleve.NewDocumentMapping()
  45. numericFieldMapping := bleve.NewNumericFieldMapping()
  46. numericFieldMapping.Store = false
  47. numericFieldMapping.IncludeInAll = false
  48. docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping)
  49. textFieldMapping := bleve.NewTextFieldMapping()
  50. textFieldMapping.Store = false
  51. textFieldMapping.IncludeInAll = false
  52. boolFieldMapping := bleve.NewBooleanFieldMapping()
  53. boolFieldMapping.Store = false
  54. boolFieldMapping.IncludeInAll = false
  55. numberFieldMapping := bleve.NewNumericFieldMapping()
  56. numberFieldMapping.Store = false
  57. numberFieldMapping.IncludeInAll = false
  58. docMapping.AddFieldMappingsAt("is_public", boolFieldMapping)
  59. docMapping.AddFieldMappingsAt("title", textFieldMapping)
  60. docMapping.AddFieldMappingsAt("content", textFieldMapping)
  61. docMapping.AddFieldMappingsAt("comments", textFieldMapping)
  62. docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping)
  63. docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping)
  64. docMapping.AddFieldMappingsAt("is_archived", boolFieldMapping)
  65. docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping)
  66. docMapping.AddFieldMappingsAt("no_label", boolFieldMapping)
  67. docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping)
  68. docMapping.AddFieldMappingsAt("project_id", numberFieldMapping)
  69. docMapping.AddFieldMappingsAt("project_board_id", numberFieldMapping)
  70. docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping)
  71. docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping)
  72. docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping)
  73. docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping)
  74. docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping)
  75. docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping)
  76. docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping)
  77. docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping)
  78. docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping)
  79. docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping)
  80. if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
  81. return nil, err
  82. } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
  83. "type": custom.Name,
  84. "char_filters": []string{},
  85. "tokenizer": unicode.Name,
  86. "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
  87. }); err != nil {
  88. return nil, err
  89. }
  90. mapping.DefaultAnalyzer = issueIndexerAnalyzer
  91. mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
  92. mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
  93. mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs
  94. return mapping, nil
  95. }
  96. var _ internal.Indexer = &Indexer{}
  97. // Indexer implements Indexer interface
  98. type Indexer struct {
  99. inner *inner_bleve.Indexer
  100. indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
  101. }
  102. func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
  103. return indexer.SearchModesExactWordsFuzzy()
  104. }
  105. // NewIndexer creates a new bleve local indexer
  106. func NewIndexer(indexDir string) *Indexer {
  107. inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping)
  108. return &Indexer{
  109. Indexer: inner,
  110. inner: inner,
  111. }
  112. }
  113. // Index will save the index data
  114. func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
  115. batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
  116. for _, issue := range issues {
  117. if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
  118. return err
  119. }
  120. }
  121. return batch.Flush()
  122. }
  123. // Delete deletes indexes by ids
  124. func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
  125. batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
  126. for _, id := range ids {
  127. if err := batch.Delete(indexer_internal.Base36(id)); err != nil {
  128. return err
  129. }
  130. }
  131. return batch.Flush()
  132. }
  133. // Search searches for issues by given conditions.
  134. // Returns the matching issue IDs
  135. func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
  136. var queries []query.Query
  137. if options.Keyword != "" {
  138. searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
  139. if searchMode == indexer.SearchModeWords || searchMode == indexer.SearchModeFuzzy {
  140. fuzziness := 0
  141. if searchMode == indexer.SearchModeFuzzy {
  142. fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
  143. }
  144. queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
  145. inner_bleve.MatchAndQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
  146. inner_bleve.MatchAndQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
  147. inner_bleve.MatchAndQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
  148. }...))
  149. } else /* exact */ {
  150. queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
  151. inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0),
  152. inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0),
  153. inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0),
  154. }...))
  155. }
  156. }
  157. if len(options.RepoIDs) > 0 || options.AllPublic {
  158. var repoQueries []query.Query
  159. for _, repoID := range options.RepoIDs {
  160. repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id"))
  161. }
  162. if options.AllPublic {
  163. repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public"))
  164. }
  165. queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...))
  166. }
  167. if options.IsPull.Has() {
  168. queries = append(queries, inner_bleve.BoolFieldQuery(options.IsPull.Value(), "is_pull"))
  169. }
  170. if options.IsClosed.Has() {
  171. queries = append(queries, inner_bleve.BoolFieldQuery(options.IsClosed.Value(), "is_closed"))
  172. }
  173. if options.IsArchived.Has() {
  174. queries = append(queries, inner_bleve.BoolFieldQuery(options.IsArchived.Value(), "is_archived"))
  175. }
  176. if options.NoLabelOnly {
  177. queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_label"))
  178. } else {
  179. if len(options.IncludedLabelIDs) > 0 {
  180. var includeQueries []query.Query
  181. for _, labelID := range options.IncludedLabelIDs {
  182. includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
  183. }
  184. queries = append(queries, bleve.NewConjunctionQuery(includeQueries...))
  185. } else if len(options.IncludedAnyLabelIDs) > 0 {
  186. var includeQueries []query.Query
  187. for _, labelID := range options.IncludedAnyLabelIDs {
  188. includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
  189. }
  190. queries = append(queries, bleve.NewDisjunctionQuery(includeQueries...))
  191. }
  192. if len(options.ExcludedLabelIDs) > 0 {
  193. var excludeQueries []query.Query
  194. for _, labelID := range options.ExcludedLabelIDs {
  195. q := bleve.NewBooleanQuery()
  196. q.AddMustNot(inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
  197. excludeQueries = append(excludeQueries, q)
  198. }
  199. queries = append(queries, bleve.NewConjunctionQuery(excludeQueries...))
  200. }
  201. }
  202. if len(options.MilestoneIDs) > 0 {
  203. var milestoneQueries []query.Query
  204. for _, milestoneID := range options.MilestoneIDs {
  205. milestoneQueries = append(milestoneQueries, inner_bleve.NumericEqualityQuery(milestoneID, "milestone_id"))
  206. }
  207. queries = append(queries, bleve.NewDisjunctionQuery(milestoneQueries...))
  208. }
  209. if options.ProjectID.Has() {
  210. queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectID.Value(), "project_id"))
  211. }
  212. if options.ProjectColumnID.Has() {
  213. queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectColumnID.Value(), "project_board_id"))
  214. }
  215. if options.PosterID != "" {
  216. // "(none)" becomes 0, it means no poster
  217. posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
  218. queries = append(queries, inner_bleve.NumericEqualityQuery(posterIDInt64, "poster_id"))
  219. }
  220. if options.AssigneeID != "" {
  221. if options.AssigneeID == "(any)" {
  222. queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(optional.Some[int64](1), optional.None[int64](), "assignee_id"))
  223. } else {
  224. // "(none)" becomes 0, it means no assignee
  225. assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
  226. queries = append(queries, inner_bleve.NumericEqualityQuery(assigneeIDInt64, "assignee_id"))
  227. }
  228. }
  229. if options.MentionID.Has() {
  230. queries = append(queries, inner_bleve.NumericEqualityQuery(options.MentionID.Value(), "mention_ids"))
  231. }
  232. if options.ReviewedID.Has() {
  233. queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewedID.Value(), "reviewed_ids"))
  234. }
  235. if options.ReviewRequestedID.Has() {
  236. queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewRequestedID.Value(), "review_requested_ids"))
  237. }
  238. if options.SubscriberID.Has() {
  239. queries = append(queries, inner_bleve.NumericEqualityQuery(options.SubscriberID.Value(), "subscriber_ids"))
  240. }
  241. if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
  242. queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(
  243. options.UpdatedAfterUnix,
  244. options.UpdatedBeforeUnix,
  245. "updated_unix"))
  246. }
  247. var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
  248. if len(queries) == 0 {
  249. indexerQuery = bleve.NewMatchAllQuery()
  250. }
  251. skip, limit := indexer_internal.ParsePaginator(options.Paginator)
  252. search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false)
  253. if options.SortBy == "" {
  254. options.SortBy = internal.SortByCreatedAsc
  255. }
  256. search.SortBy([]string{string(options.SortBy), "-_id"})
  257. result, err := b.inner.Indexer.SearchInContext(ctx, search)
  258. if err != nil {
  259. return nil, err
  260. }
  261. ret := &internal.SearchResult{
  262. Total: int64(result.Total),
  263. Hits: make([]internal.Match, 0, len(result.Hits)),
  264. }
  265. for _, hit := range result.Hits {
  266. id, err := indexer_internal.ParseBase36(hit.ID)
  267. if err != nil {
  268. return nil, err
  269. }
  270. ret.Hits = append(ret.Hits, internal.Match{
  271. ID: id,
  272. })
  273. }
  274. return ret, nil
  275. }