| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- // Copyright 2024 The Gitea Authors. All rights reserved.
- // SPDX-License-Identifier: MIT
-
- package path
-
- import (
- "slices"
- "strings"
-
- "github.com/blevesearch/bleve/v2/analysis"
- "github.com/blevesearch/bleve/v2/registry"
- )
-
- const (
- Name = "gitea/path"
- )
-
- type TokenFilter struct{}
-
- func NewTokenFilter() *TokenFilter {
- return &TokenFilter{}
- }
-
- func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) {
- return NewTokenFilter(), nil
- }
-
- func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
- if len(input) == 1 {
- // if there is only one token, we dont need to generate the reversed chain
- return generatePathTokens(input, false)
- }
-
- normal := generatePathTokens(input, false)
- reversed := generatePathTokens(input, true)
-
- return append(normal, reversed...)
- }
-
- // Generates path tokens from the input tokens.
- // This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component
- // in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md).
- //
- // If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful
- // to efficiently search for filenames without supplying the fullpath.
- func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream {
- terms := make([]string, 0, len(input))
- longestTerm := 0
-
- if reversed {
- slices.Reverse(input)
- }
-
- for i := range input {
- var sb strings.Builder
- sb.Write(input[0].Term)
-
- for j := 1; j < i; j++ {
- sb.WriteString("/")
- sb.Write(input[j].Term)
- }
-
- term := sb.String()
-
- if longestTerm < len(term) {
- longestTerm = len(term)
- }
-
- terms = append(terms, term)
- }
-
- output := make(analysis.TokenStream, 0, len(terms))
-
- for _, term := range terms {
- var start, end int
-
- if reversed {
- start = 0
- end = len(term)
- } else {
- start = longestTerm - len(term)
- end = longestTerm
- }
-
- token := analysis.Token{
- Position: 1,
- Start: start,
- End: end,
- Type: analysis.AlphaNumeric,
- Term: []byte(term),
- }
-
- output = append(output, &token)
- }
-
- return output
- }
-
- func init() {
- // FIXME: move it to the bleve's init function, but do not call it in global init
- err := registry.RegisterTokenFilter(Name, TokenFilterConstructor)
- if err != nil {
- panic(err)
- }
- }
|