bazel-lib/tools/copy_to_directory/main.go

419 lines
11 KiB
Go

package main
import (
"encoding/json"
"fmt"
"io"
"io/fs"
"log"
"os"
"path"
"path/filepath"
"strings"
"sync"
"github.com/aspect-build/bazel-lib/tools/common"
"github.com/bmatcuk/doublestar/v4"
"golang.org/x/exp/maps"
)
type fileInfo struct {
Package string `json:"package"`
Path string `json:"path"`
RootPath string `json:"root_path"`
ShortPath string `json:"short_path"`
Workspace string `json:"workspace"`
WorkspacePath string `json:"workspace_path"`
Hardlink bool `json:"hardlink"`
Realpath string
FileInfo fs.FileInfo
}
type config struct {
AllowOverwrites bool `json:"allow_overwrites"`
Dst string `json:"dst"`
ExcludeSrcsPackages []string `json:"exclude_srcs_packages"`
ExcludeSrcsPatterns []string `json:"exclude_srcs_patterns"`
Files []fileInfo `json:"files"`
IncludeExternalRepositories []string `json:"include_external_repositories"`
IncludeSrcsPackages []string `json:"include_srcs_packages"`
IncludeSrcsPatterns []string `json:"include_srcs_patterns"`
ReplacePrefixes map[string]string `json:"replace_prefixes"`
RootPaths []string `json:"root_paths"`
Verbose bool `json:"verbose"`
ReplacePrefixesKeys []string
TargetWorkspace *string
}
type copyMap map[string]fileInfo
type pathSet map[string]bool
var copySet = copyMap{}
var mkdirSet = pathSet{}
func parseConfig(configPath string, wkspName *string) (*config, error) {
f, err := os.Open(configPath)
if err != nil {
return nil, fmt.Errorf("failed to open config file: %w", err)
}
defer f.Close()
byteValue, err := io.ReadAll(f)
if err != nil {
return nil, fmt.Errorf("failed to read config file: %w", err)
}
var cfg config
if err := json.Unmarshal([]byte(byteValue), &cfg); err != nil {
return nil, fmt.Errorf("failed to parse config file: %w", err)
}
cfg.ReplacePrefixesKeys = maps.Keys(cfg.ReplacePrefixes)
cfg.TargetWorkspace = wkspName
return &cfg, nil
}
func anyGlobsMatch(globs []string, test string) (bool, error) {
for _, g := range globs {
match, err := doublestar.Match(g, test)
if err != nil {
return false, err
}
if match {
return true, nil
}
}
return false, nil
}
func longestGlobsMatch(globs []string, test string) (string, int, error) {
result := ""
index := 0
for i, g := range globs {
match, err := longestGlobMatch(g, test)
if err != nil {
return "", 0, err
}
if len(match) > len(result) {
result = match
index = i
}
}
return result, index, nil
}
func longestGlobMatch(g string, test string) (string, error) {
for i := 0; i < len(test); i++ {
t := test[:len(test)-i]
match, err := doublestar.Match(g, t)
if err != nil {
return "", err
}
if match {
return t, nil
}
}
return "", nil
}
type walker struct {
queue chan<- common.CopyOpts
}
func (w *walker) copyDir(cfg *config, srcPaths pathSet, file fileInfo) error {
if srcPaths == nil {
srcPaths = pathSet{}
}
srcPaths[file.Path] = true
// filepath.WalkDir walks the file tree rooted at root, calling fn for each file or directory in
// the tree, including root. See https://pkg.go.dev/path/filepath#WalkDir for more info.
walkPath := file.Path
if file.Realpath != "" {
walkPath = file.Realpath
}
return filepath.WalkDir(walkPath, func(p string, dirEntry fs.DirEntry, err error) error {
if err != nil {
return err
}
if dirEntry.IsDir() {
// remember that this directory was visited to prevent infinite recursive symlink loops and
// then short-circuit by returning nil since filepath.Walk will visit files contained within
// this directory automatically
srcPaths[p] = true
return nil
}
info, err := dirEntry.Info()
if err != nil {
return err
}
r, err := common.FileRel(walkPath, p)
if err != nil {
return err
}
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
// symlink to directories are intentionally never followed by filepath.Walk to avoid infinite recursion
linkPath, err := common.Realpath(p)
if err != nil {
return err
}
if srcPaths[linkPath] {
// recursive symlink; silently ignore
return nil
}
stat, err := os.Stat(linkPath)
if err != nil {
return fmt.Errorf("failed to stat file %s pointed to by symlink %s: %w", linkPath, p, err)
}
if stat.IsDir() {
// symlink points to a directory
f := fileInfo{
Package: file.Package,
Path: linkPath,
RootPath: file.RootPath,
ShortPath: path.Join(file.ShortPath),
Workspace: file.Workspace,
WorkspacePath: path.Join(file.WorkspacePath),
Hardlink: file.Hardlink,
FileInfo: stat,
}
return w.copyDir(cfg, srcPaths, f)
} else {
// symlink points to a regular file
f := fileInfo{
Package: file.Package,
Path: linkPath,
RootPath: file.RootPath,
ShortPath: path.Join(file.ShortPath, r),
Workspace: file.Workspace,
WorkspacePath: path.Join(file.WorkspacePath, r),
Hardlink: file.Hardlink,
FileInfo: stat,
}
return w.copyPath(cfg, f)
}
}
// a regular file
f := fileInfo{
Package: file.Package,
Path: p,
RootPath: file.RootPath,
ShortPath: path.Join(file.ShortPath, r),
Workspace: file.Workspace,
WorkspacePath: path.Join(file.WorkspacePath, r),
Hardlink: file.Hardlink,
FileInfo: info,
}
return w.copyPath(cfg, f)
})
}
func (w *walker) copyPath(cfg *config, file fileInfo) error {
// Apply filters and transformations in the following order:
//
// - `include_external_repositories`
// - `include_srcs_packages`
// - `exclude_srcs_packages`
// - `root_paths`
// - `include_srcs_patterns`
// - `exclude_srcs_patterns`
// - `replace_prefixes`
//
// If you change this order please update the docstrings in the copy_to_directory rule.
outputPath := file.WorkspacePath
outputRoot := path.Dir(outputPath)
// apply include_external_repositories (if the file is from an external repository)
// automatically include files from the same workspace as this target, even if
// that is an external workspace with respect to `__main__`
if file.Workspace != "" && (cfg.TargetWorkspace == nil || file.Workspace != *cfg.TargetWorkspace) {
match, err := anyGlobsMatch(cfg.IncludeExternalRepositories, file.Workspace)
if err != nil {
return err
}
if !match {
return nil // external workspace is not included
}
}
// apply include_srcs_packages
match, err := anyGlobsMatch(cfg.IncludeSrcsPackages, file.Package)
if err != nil {
return err
}
if !match {
return nil // package is not included
}
// apply exclude_srcs_packages
match, err = anyGlobsMatch(cfg.ExcludeSrcsPackages, file.Package)
if err != nil {
return err
}
if match {
return nil // package is excluded
}
// apply root_paths
rootPathMatch, _, err := longestGlobsMatch(cfg.RootPaths, outputRoot)
if err != nil {
return err
}
if rootPathMatch != "" {
outputPath = strings.TrimPrefix(outputPath[len(rootPathMatch):], "/")
}
// apply include_srcs_patterns
match, err = anyGlobsMatch(cfg.IncludeSrcsPatterns, outputPath)
if err != nil {
return err
}
if !match {
return nil // outputPath is not included
}
// apply exclude_srcs_patterns
match, err = anyGlobsMatch(cfg.ExcludeSrcsPatterns, outputPath)
if err != nil {
return err
}
if match {
return nil // outputPath is excluded
}
// apply replace_prefixes
replacePrefixMatch, replacePrefixIndex, err := longestGlobsMatch(cfg.ReplacePrefixesKeys, outputPath)
if err != nil {
return err
}
if replacePrefixMatch != "" {
replaceWith := cfg.ReplacePrefixes[cfg.ReplacePrefixesKeys[replacePrefixIndex]]
outputPath = replaceWith + outputPath[len(replacePrefixMatch):]
}
outputPath = path.Join(cfg.Dst, outputPath)
// add this file to the copy Paths
dup, exists := copySet[outputPath]
if exists {
if dup.ShortPath == file.ShortPath && file.FileInfo.Size() == dup.FileInfo.Size() {
// this is likely the same file listed twice: the original in the source tree and the copy in the output tree
return nil
} else if !cfg.AllowOverwrites {
return fmt.Errorf("duplicate output file '%s' configured from source files '%s' and '%s'; set 'allow_overwrites' to True to allow this overwrites but keep in mind that order matters when this is set", outputPath, dup.Path, file.Path)
}
}
copySet[outputPath] = file
outputDir := path.Dir(outputPath)
if !mkdirSet[outputDir] {
if err = os.MkdirAll(outputDir, os.ModePerm); err != nil {
return err
}
// https://pkg.go.dev/path#Dir
for len(outputDir) > 0 && outputDir != "/" && outputDir != "." {
mkdirSet[outputDir] = true
outputDir = path.Dir(outputDir)
}
}
if !cfg.AllowOverwrites {
// if we don't allow overwrites then we can start copying as soon as a copy is calculated
w.queue <- common.NewCopyOpts(file.Path, outputPath, file.FileInfo, file.Hardlink, cfg.Verbose)
}
return nil
}
func (w *walker) copyPaths(cfg *config) error {
for _, file := range cfg.Files {
info, err := os.Lstat(file.Path)
if err != nil {
return fmt.Errorf("failed to lstat file %s: %w", file.Path, err)
}
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
// On Windows, filepath.WalkDir doesn't like directory symlinks so we must
// call filepath.WalkDir on the realpath
realpath, err := common.Realpath(file.Path)
if err != nil {
return err
}
stat, err := os.Stat(realpath)
if err != nil {
return fmt.Errorf("failed to stat file %s pointed to by symlink %s: %w", realpath, file.Path, err)
}
file.Realpath = realpath
file.FileInfo = stat
} else {
file.FileInfo = info
}
if file.FileInfo.IsDir() {
if err := w.copyDir(cfg, nil, file); err != nil {
return err
}
} else {
if err := w.copyPath(cfg, file); err != nil {
return err
}
}
}
return nil
}
func main() {
args := os.Args[1:]
if len(args) != 1 && len(args) != 2 {
fmt.Println("Usage: copy_to_directory config_file [workspace_name]")
os.Exit(1)
}
configFile := args[0]
// Read workspace arg if present.
var wksp *string = nil
if len(args) >= 2 {
wksp = &args[1]
}
cfg, err := parseConfig(configFile, wksp)
if err != nil {
log.Fatal(err)
}
queue := make(chan common.CopyOpts, 100)
var wg sync.WaitGroup
const numWorkers = 10
wg.Add(numWorkers)
for i := 0; i < numWorkers; i++ {
go common.NewCopyWorker(queue).Run(&wg)
}
walker := &walker{queue}
if err = walker.copyPaths(cfg); err != nil {
log.Fatal(err)
}
if cfg.AllowOverwrites {
// if we allow overwrites then we must wait until all copy paths are calculated before starting
// any copy operations
for outputPath, file := range copySet {
queue <- common.NewCopyOpts(file.Path, outputPath, file.FileInfo, file.Hardlink, cfg.Verbose)
}
}
close(queue)
wg.Wait()
}