| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- // Copyright 2024 The Gitea Authors. All rights reserved.
- // SPDX-License-Identifier: MIT
-
- package repository
-
- import (
- "context"
- "fmt"
- "io"
-
- "code.gitea.io/gitea/models/db"
- repo_model "code.gitea.io/gitea/models/repo"
- "code.gitea.io/gitea/modules/container"
- "code.gitea.io/gitea/modules/git"
- "code.gitea.io/gitea/modules/gitrepo"
- "code.gitea.io/gitea/modules/graceful"
- "code.gitea.io/gitea/modules/log"
- "code.gitea.io/gitea/modules/options"
- "code.gitea.io/gitea/modules/queue"
-
- licenseclassifier "github.com/google/licenseclassifier/v2"
- )
-
- var (
- classifier *licenseclassifier.Classifier
- LicenseFileName = "LICENSE"
-
- // licenseUpdaterQueue represents a queue to handle update repo licenses
- licenseUpdaterQueue *queue.WorkerPoolQueue[*LicenseUpdaterOptions]
- )
-
- func AddRepoToLicenseUpdaterQueue(opts *LicenseUpdaterOptions) error {
- if opts == nil {
- return nil
- }
- return licenseUpdaterQueue.Push(opts)
- }
-
- func InitLicenseClassifier() error {
- // threshold should be 0.84~0.86 or the test will be failed
- classifier = licenseclassifier.NewClassifier(.85)
- licenseFiles, err := options.AssetFS().ListFiles("license", true)
- if err != nil {
- return err
- }
-
- for _, licenseFile := range licenseFiles {
- licenseName := licenseFile
- data, err := options.License(licenseFile)
- if err != nil {
- return err
- }
- classifier.AddContent("License", licenseName, licenseName, data)
- }
- return nil
- }
-
- type LicenseUpdaterOptions struct {
- RepoID int64
- }
-
- func repoLicenseUpdater(items ...*LicenseUpdaterOptions) []*LicenseUpdaterOptions {
- ctx := graceful.GetManager().ShutdownContext()
-
- for _, opts := range items {
- repo, err := repo_model.GetRepositoryByID(ctx, opts.RepoID)
- if err != nil {
- log.Error("repoLicenseUpdater [%d] failed: GetRepositoryByID: %v", opts.RepoID, err)
- continue
- }
- if repo.IsEmpty {
- continue
- }
-
- gitRepo, err := gitrepo.OpenRepository(ctx, repo)
- if err != nil {
- log.Error("repoLicenseUpdater [%d] failed: OpenRepository: %v", opts.RepoID, err)
- continue
- }
- defer gitRepo.Close()
-
- commit, err := gitRepo.GetBranchCommit(repo.DefaultBranch)
- if err != nil {
- log.Error("repoLicenseUpdater [%d] failed: GetBranchCommit: %v", opts.RepoID, err)
- continue
- }
- if err = UpdateRepoLicenses(ctx, repo, commit); err != nil {
- log.Error("repoLicenseUpdater [%d] failed: updateRepoLicenses: %v", opts.RepoID, err)
- }
- }
- return nil
- }
-
- func SyncRepoLicenses(ctx context.Context) error {
- log.Trace("Doing: SyncRepoLicenses")
-
- if err := db.Iterate(
- ctx,
- nil,
- func(ctx context.Context, repo *repo_model.Repository) error {
- select {
- case <-ctx.Done():
- return db.ErrCancelledf("before sync repo licenses for %s", repo.FullName())
- default:
- }
- return AddRepoToLicenseUpdaterQueue(&LicenseUpdaterOptions{RepoID: repo.ID})
- },
- ); err != nil {
- log.Trace("Error: SyncRepoLicenses: %v", err)
- return err
- }
-
- log.Trace("Finished: SyncReposLicenses")
- return nil
- }
-
- // UpdateRepoLicenses will update repository licenses col if license file exists
- func UpdateRepoLicenses(ctx context.Context, repo *repo_model.Repository, commit *git.Commit) error {
- if commit == nil {
- return nil
- }
-
- b, err := commit.GetBlobByPath(LicenseFileName)
- if err != nil && !git.IsErrNotExist(err) {
- return fmt.Errorf("GetBlobByPath: %w", err)
- }
-
- if git.IsErrNotExist(err) {
- return repo_model.CleanRepoLicenses(ctx, repo)
- }
-
- licenses := make([]string, 0)
- if b != nil {
- r, err := b.DataAsync()
- if err != nil {
- return err
- }
- defer r.Close()
-
- licenses, err = detectLicense(r)
- if err != nil {
- return fmt.Errorf("detectLicense: %w", err)
- }
- }
- return repo_model.UpdateRepoLicenses(ctx, repo, commit.ID.String(), licenses)
- }
-
- // detectLicense returns the licenses detected by the given content buff
- func detectLicense(r io.Reader) ([]string, error) {
- if r == nil {
- return nil, nil
- }
-
- matches, err := classifier.MatchFrom(r)
- if err != nil {
- return nil, err
- }
- if len(matches.Matches) > 0 {
- results := make(container.Set[string], len(matches.Matches))
- for _, r := range matches.Matches {
- if r.MatchType == "License" && !results.Contains(r.Variant) {
- results.Add(r.Variant)
- }
- }
- return results.Values(), nil
- }
- return nil, nil
- }
|