gitea源码

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. // Copyright 2024 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package path
  4. import (
  5. "slices"
  6. "strings"
  7. "github.com/blevesearch/bleve/v2/analysis"
  8. "github.com/blevesearch/bleve/v2/registry"
  9. )
  10. const (
  11. Name = "gitea/path"
  12. )
  13. type TokenFilter struct{}
  14. func NewTokenFilter() *TokenFilter {
  15. return &TokenFilter{}
  16. }
  17. func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) {
  18. return NewTokenFilter(), nil
  19. }
  20. func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
  21. if len(input) == 1 {
  22. // if there is only one token, we dont need to generate the reversed chain
  23. return generatePathTokens(input, false)
  24. }
  25. normal := generatePathTokens(input, false)
  26. reversed := generatePathTokens(input, true)
  27. return append(normal, reversed...)
  28. }
  29. // Generates path tokens from the input tokens.
  30. // This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component
  31. // in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md).
  32. //
  33. // If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful
  34. // to efficiently search for filenames without supplying the fullpath.
  35. func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream {
  36. terms := make([]string, 0, len(input))
  37. longestTerm := 0
  38. if reversed {
  39. slices.Reverse(input)
  40. }
  41. for i := range input {
  42. var sb strings.Builder
  43. sb.Write(input[0].Term)
  44. for j := 1; j < i; j++ {
  45. sb.WriteString("/")
  46. sb.Write(input[j].Term)
  47. }
  48. term := sb.String()
  49. if longestTerm < len(term) {
  50. longestTerm = len(term)
  51. }
  52. terms = append(terms, term)
  53. }
  54. output := make(analysis.TokenStream, 0, len(terms))
  55. for _, term := range terms {
  56. var start, end int
  57. if reversed {
  58. start = 0
  59. end = len(term)
  60. } else {
  61. start = longestTerm - len(term)
  62. end = longestTerm
  63. }
  64. token := analysis.Token{
  65. Position: 1,
  66. Start: start,
  67. End: end,
  68. Type: analysis.AlphaNumeric,
  69. Term: []byte(term),
  70. }
  71. output = append(output, &token)
  72. }
  73. return output
  74. }
  75. func init() {
  76. // FIXME: move it to the bleve's init function, but do not call it in global init
  77. err := registry.RegisterTokenFilter(Name, TokenFilterConstructor)
  78. if err != nil {
  79. panic(err)
  80. }
  81. }