gitea源码

path_test.go 1.6KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. // Copyright 2024 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package path
  4. import (
  5. "fmt"
  6. "testing"
  7. "github.com/blevesearch/bleve/v2/analysis"
  8. "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
  9. "github.com/stretchr/testify/assert"
  10. )
  11. type Scenario struct {
  12. Input string
  13. Tokens []string
  14. }
  15. func TestTokenFilter(t *testing.T) {
  16. scenarios := []struct {
  17. Input string
  18. Terms []string
  19. }{
  20. {
  21. Input: "Dockerfile",
  22. Terms: []string{"Dockerfile"},
  23. },
  24. {
  25. Input: "Dockerfile.rootless",
  26. Terms: []string{"Dockerfile.rootless"},
  27. },
  28. {
  29. Input: "a/b/c/Dockerfile.rootless",
  30. Terms: []string{"a", "a/b", "a/b/c", "a/b/c/Dockerfile.rootless", "Dockerfile.rootless", "Dockerfile.rootless/c", "Dockerfile.rootless/c/b", "Dockerfile.rootless/c/b/a"},
  31. },
  32. {
  33. Input: "",
  34. Terms: []string{},
  35. },
  36. }
  37. for _, scenario := range scenarios {
  38. t.Run(fmt.Sprintf("ensure terms of '%s'", scenario.Input), func(t *testing.T) {
  39. terms := extractTerms(scenario.Input)
  40. assert.Len(t, terms, len(scenario.Terms))
  41. for _, term := range terms {
  42. assert.Contains(t, scenario.Terms, term)
  43. }
  44. })
  45. }
  46. }
  47. func extractTerms(input string) []string {
  48. tokens := tokenize(input)
  49. filteredTokens := filter(tokens)
  50. terms := make([]string, 0, len(filteredTokens))
  51. for _, token := range filteredTokens {
  52. terms = append(terms, string(token.Term))
  53. }
  54. return terms
  55. }
  56. func filter(input analysis.TokenStream) analysis.TokenStream {
  57. filter := NewTokenFilter()
  58. return filter.Filter(input)
  59. }
  60. func tokenize(input string) analysis.TokenStream {
  61. tokenizer := unicode.NewUnicodeTokenizer()
  62. return tokenizer.Tokenize([]byte(input))
  63. }