forgejo/services/cron/tasks_basic.go
wxiaoguang 6bc3079c00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)

## Review without space diff

https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1

## Purpose of this PR

1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command

## The main idea of this PR

* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
    * Before: `AddArguments("-m").AddDynamicArguments(message)`
    * After: `AddOptionValues("-m", message)`
    * -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`

## FAQ

### Why these changes were not done in #21535 ?

#21535 is mainly a search&replace, it did its best to not change too
much logic.

Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.


### The naming of `AddOptionXxx`

According to git's manual, the `--xxx` part is called `option`.

### How can it guarantee that `internal.CmdArg` won't be not misused?

Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.

And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.

### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?

Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.

### Why there was a `CmdArgCheck` and why it's removed?

At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.


### Why many codes for `signArg == ""` is deleted?

Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00

175 lines
4.9 KiB
Go

// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package cron
import (
"context"
"time"
"code.gitea.io/gitea/models"
git_model "code.gitea.io/gitea/models/git"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/models/webhook"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/auth"
"code.gitea.io/gitea/services/migrations"
mirror_service "code.gitea.io/gitea/services/mirror"
packages_service "code.gitea.io/gitea/services/packages"
repo_service "code.gitea.io/gitea/services/repository"
archiver_service "code.gitea.io/gitea/services/repository/archiver"
)
func registerUpdateMirrorTask() {
type UpdateMirrorTaskConfig struct {
BaseConfig
PullLimit int
PushLimit int
}
RegisterTaskFatal("update_mirrors", &UpdateMirrorTaskConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: false,
Schedule: "@every 10m",
},
PullLimit: 50,
PushLimit: 50,
}, func(ctx context.Context, _ *user_model.User, cfg Config) error {
umtc := cfg.(*UpdateMirrorTaskConfig)
return mirror_service.Update(ctx, umtc.PullLimit, umtc.PushLimit)
})
}
func registerRepoHealthCheck() {
type RepoHealthCheckConfig struct {
BaseConfig
Timeout time.Duration
Args []string `delim:" "`
}
RegisterTaskFatal("repo_health_check", &RepoHealthCheckConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: false,
Schedule: "@midnight",
},
Timeout: 60 * time.Second,
Args: []string{},
}, func(ctx context.Context, _ *user_model.User, config Config) error {
rhcConfig := config.(*RepoHealthCheckConfig)
// the git args are set by config, they can be safe to be trusted
return repo_service.GitFsckRepos(ctx, rhcConfig.Timeout, git.ToTrustedCmdArgs(rhcConfig.Args))
})
}
func registerCheckRepoStats() {
RegisterTaskFatal("check_repo_stats", &BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@midnight",
}, func(ctx context.Context, _ *user_model.User, _ Config) error {
return models.CheckRepoStats(ctx)
})
}
func registerArchiveCleanup() {
RegisterTaskFatal("archive_cleanup", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@midnight",
},
OlderThan: 24 * time.Hour,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
acConfig := config.(*OlderThanConfig)
return archiver_service.DeleteOldRepositoryArchives(ctx, acConfig.OlderThan)
})
}
func registerSyncExternalUsers() {
RegisterTaskFatal("sync_external_users", &UpdateExistingConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: false,
Schedule: "@midnight",
},
UpdateExisting: true,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
realConfig := config.(*UpdateExistingConfig)
return auth.SyncExternalUsers(ctx, realConfig.UpdateExisting)
})
}
func registerDeletedBranchesCleanup() {
RegisterTaskFatal("deleted_branches_cleanup", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@midnight",
},
OlderThan: 24 * time.Hour,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
realConfig := config.(*OlderThanConfig)
git_model.RemoveOldDeletedBranches(ctx, realConfig.OlderThan)
return nil
})
}
func registerUpdateMigrationPosterID() {
RegisterTaskFatal("update_migration_poster_id", &BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@midnight",
}, func(ctx context.Context, _ *user_model.User, _ Config) error {
return migrations.UpdateMigrationPosterID(ctx)
})
}
func registerCleanupHookTaskTable() {
RegisterTaskFatal("cleanup_hook_task_table", &CleanupHookTaskConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: false,
Schedule: "@midnight",
},
CleanupType: "OlderThan",
OlderThan: 168 * time.Hour,
NumberToKeep: 10,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
realConfig := config.(*CleanupHookTaskConfig)
return webhook.CleanupHookTaskTable(ctx, webhook.ToHookTaskCleanupType(realConfig.CleanupType), realConfig.OlderThan, realConfig.NumberToKeep)
})
}
func registerCleanupPackages() {
RegisterTaskFatal("cleanup_packages", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@midnight",
},
OlderThan: 24 * time.Hour,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
realConfig := config.(*OlderThanConfig)
return packages_service.Cleanup(ctx, realConfig.OlderThan)
})
}
func initBasicTasks() {
if setting.Mirror.Enabled {
registerUpdateMirrorTask()
}
registerRepoHealthCheck()
registerCheckRepoStats()
registerArchiveCleanup()
registerSyncExternalUsers()
registerDeletedBranchesCleanup()
if !setting.Repository.DisableMigrations {
registerUpdateMigrationPosterID()
}
registerCleanupHookTaskTable()
if setting.Packages.Enabled {
registerCleanupPackages()
}
}