package main import ( "encoding/json" "fmt" "io" "io/fs" "log" "os" "path" "path/filepath" "strings" "sync" "github.com/aspect-build/bazel-lib/tools/common" "github.com/bmatcuk/doublestar/v4" "golang.org/x/exp/maps" ) type fileInfo struct { Package string `json:"package"` Path string `json:"path"` RootPath string `json:"root_path"` ShortPath string `json:"short_path"` Workspace string `json:"workspace"` WorkspacePath string `json:"workspace_path"` Hardlink bool `json:"hardlink"` Realpath string FileInfo fs.FileInfo } type config struct { AllowOverwrites bool `json:"allow_overwrites"` Dst string `json:"dst"` ExcludeSrcsPackages []string `json:"exclude_srcs_packages"` ExcludeSrcsPatterns []string `json:"exclude_srcs_patterns"` Files []fileInfo `json:"files"` IncludeExternalRepositories []string `json:"include_external_repositories"` IncludeSrcsPackages []string `json:"include_srcs_packages"` IncludeSrcsPatterns []string `json:"include_srcs_patterns"` ReplacePrefixes map[string]string `json:"replace_prefixes"` RootPaths []string `json:"root_paths"` Verbose bool `json:"verbose"` ReplacePrefixesKeys []string TargetWorkspace *string } type copyMap map[string]fileInfo type pathSet map[string]bool var copySet = copyMap{} var mkdirSet = pathSet{} func parseConfig(configPath string, wkspName *string) (*config, error) { f, err := os.Open(configPath) if err != nil { return nil, fmt.Errorf("failed to open config file: %w", err) } defer f.Close() byteValue, err := io.ReadAll(f) if err != nil { return nil, fmt.Errorf("failed to read config file: %w", err) } var cfg config if err := json.Unmarshal([]byte(byteValue), &cfg); err != nil { return nil, fmt.Errorf("failed to parse config file: %w", err) } cfg.ReplacePrefixesKeys = maps.Keys(cfg.ReplacePrefixes) cfg.TargetWorkspace = wkspName return &cfg, nil } func anyGlobsMatch(globs []string, test string) (bool, error) { for _, g := range globs { match, err := doublestar.Match(g, test) if err != nil { return false, err } if match { return true, nil } } return false, nil } func longestGlobsMatch(globs []string, test string) (string, int, error) { result := "" index := 0 for i, g := range globs { match, err := longestGlobMatch(g, test) if err != nil { return "", 0, err } if len(match) > len(result) { result = match index = i } } return result, index, nil } func longestGlobMatch(g string, test string) (string, error) { for i := 0; i < len(test); i++ { t := test[:len(test)-i] match, err := doublestar.Match(g, t) if err != nil { return "", err } if match { return t, nil } } return "", nil } type walker struct { queue chan<- common.CopyOpts } func (w *walker) copyDir(cfg *config, srcPaths pathSet, file fileInfo) error { if srcPaths == nil { srcPaths = pathSet{} } srcPaths[file.Path] = true // filepath.WalkDir walks the file tree rooted at root, calling fn for each file or directory in // the tree, including root. See https://pkg.go.dev/path/filepath#WalkDir for more info. walkPath := file.Path if file.Realpath != "" { walkPath = file.Realpath } return filepath.WalkDir(walkPath, func(p string, dirEntry fs.DirEntry, err error) error { if err != nil { return err } if dirEntry.IsDir() { // remember that this directory was visited to prevent infinite recursive symlink loops and // then short-circuit by returning nil since filepath.Walk will visit files contained within // this directory automatically srcPaths[p] = true return nil } info, err := dirEntry.Info() if err != nil { return err } r, err := common.FileRel(walkPath, p) if err != nil { return err } if info.Mode()&os.ModeSymlink == os.ModeSymlink { // symlink to directories are intentionally never followed by filepath.Walk to avoid infinite recursion linkPath, err := common.Realpath(p) if err != nil { return err } if srcPaths[linkPath] { // recursive symlink; silently ignore return nil } stat, err := os.Stat(linkPath) if err != nil { return fmt.Errorf("failed to stat file %s pointed to by symlink %s: %w", linkPath, p, err) } if stat.IsDir() { // symlink points to a directory f := fileInfo{ Package: file.Package, Path: linkPath, RootPath: file.RootPath, ShortPath: path.Join(file.ShortPath), Workspace: file.Workspace, WorkspacePath: path.Join(file.WorkspacePath), Hardlink: file.Hardlink, FileInfo: stat, } return w.copyDir(cfg, srcPaths, f) } else { // symlink points to a regular file f := fileInfo{ Package: file.Package, Path: linkPath, RootPath: file.RootPath, ShortPath: path.Join(file.ShortPath, r), Workspace: file.Workspace, WorkspacePath: path.Join(file.WorkspacePath, r), Hardlink: file.Hardlink, FileInfo: stat, } return w.copyPath(cfg, f) } } // a regular file f := fileInfo{ Package: file.Package, Path: p, RootPath: file.RootPath, ShortPath: path.Join(file.ShortPath, r), Workspace: file.Workspace, WorkspacePath: path.Join(file.WorkspacePath, r), Hardlink: file.Hardlink, FileInfo: info, } return w.copyPath(cfg, f) }) } func (w *walker) copyPath(cfg *config, file fileInfo) error { // Apply filters and transformations in the following order: // // - `include_external_repositories` // - `include_srcs_packages` // - `exclude_srcs_packages` // - `root_paths` // - `include_srcs_patterns` // - `exclude_srcs_patterns` // - `replace_prefixes` // // If you change this order please update the docstrings in the copy_to_directory rule. outputPath := file.WorkspacePath outputRoot := path.Dir(outputPath) // apply include_external_repositories (if the file is from an external repository) // automatically include files from the same workspace as this target, even if // that is an external workspace with respect to `__main__` if file.Workspace != "" && (cfg.TargetWorkspace == nil || file.Workspace != *cfg.TargetWorkspace) { match, err := anyGlobsMatch(cfg.IncludeExternalRepositories, file.Workspace) if err != nil { return err } if !match { return nil // external workspace is not included } } // apply include_srcs_packages match, err := anyGlobsMatch(cfg.IncludeSrcsPackages, file.Package) if err != nil { return err } if !match { return nil // package is not included } // apply exclude_srcs_packages match, err = anyGlobsMatch(cfg.ExcludeSrcsPackages, file.Package) if err != nil { return err } if match { return nil // package is excluded } // apply root_paths rootPathMatch, _, err := longestGlobsMatch(cfg.RootPaths, outputRoot) if err != nil { return err } if rootPathMatch != "" { outputPath = strings.TrimPrefix(outputPath[len(rootPathMatch):], "/") } // apply include_srcs_patterns match, err = anyGlobsMatch(cfg.IncludeSrcsPatterns, outputPath) if err != nil { return err } if !match { return nil // outputPath is not included } // apply exclude_srcs_patterns match, err = anyGlobsMatch(cfg.ExcludeSrcsPatterns, outputPath) if err != nil { return err } if match { return nil // outputPath is excluded } // apply replace_prefixes replacePrefixMatch, replacePrefixIndex, err := longestGlobsMatch(cfg.ReplacePrefixesKeys, outputPath) if err != nil { return err } if replacePrefixMatch != "" { replaceWith := cfg.ReplacePrefixes[cfg.ReplacePrefixesKeys[replacePrefixIndex]] outputPath = replaceWith + outputPath[len(replacePrefixMatch):] } outputPath = path.Join(cfg.Dst, outputPath) // add this file to the copy Paths dup, exists := copySet[outputPath] if exists { if dup.ShortPath == file.ShortPath && file.FileInfo.Size() == dup.FileInfo.Size() { // this is likely the same file listed twice: the original in the source tree and the copy in the output tree return nil } else if !cfg.AllowOverwrites { return fmt.Errorf("duplicate output file '%s' configured from source files '%s' and '%s'; set 'allow_overwrites' to True to allow this overwrites but keep in mind that order matters when this is set", outputPath, dup.Path, file.Path) } } copySet[outputPath] = file outputDir := path.Dir(outputPath) if !mkdirSet[outputDir] { if err = os.MkdirAll(outputDir, os.ModePerm); err != nil { return err } // https://pkg.go.dev/path#Dir for len(outputDir) > 0 && outputDir != "/" && outputDir != "." { mkdirSet[outputDir] = true outputDir = path.Dir(outputDir) } } if !cfg.AllowOverwrites { // if we don't allow overwrites then we can start copying as soon as a copy is calculated w.queue <- common.NewCopyOpts(file.Path, outputPath, file.FileInfo, file.Hardlink, cfg.Verbose) } return nil } func (w *walker) copyPaths(cfg *config) error { for _, file := range cfg.Files { info, err := os.Lstat(file.Path) if err != nil { return fmt.Errorf("failed to lstat file %s: %w", file.Path, err) } if info.Mode()&os.ModeSymlink == os.ModeSymlink { // On Windows, filepath.WalkDir doesn't like directory symlinks so we must // call filepath.WalkDir on the realpath realpath, err := common.Realpath(file.Path) if err != nil { return err } stat, err := os.Stat(realpath) if err != nil { return fmt.Errorf("failed to stat file %s pointed to by symlink %s: %w", realpath, file.Path, err) } file.Realpath = realpath file.FileInfo = stat } else { file.FileInfo = info } if file.FileInfo.IsDir() { if err := w.copyDir(cfg, nil, file); err != nil { return err } } else { if err := w.copyPath(cfg, file); err != nil { return err } } } return nil } func main() { args := os.Args[1:] if len(args) != 1 && len(args) != 2 { fmt.Println("Usage: copy_to_directory config_file [workspace_name]") os.Exit(1) } configFile := args[0] // Read workspace arg if present. var wksp *string = nil if len(args) >= 2 { wksp = &args[1] } cfg, err := parseConfig(configFile, wksp) if err != nil { log.Fatal(err) } queue := make(chan common.CopyOpts, 100) var wg sync.WaitGroup const numWorkers = 10 wg.Add(numWorkers) for i := 0; i < numWorkers; i++ { go common.NewCopyWorker(queue).Run(&wg) } walker := &walker{queue} if err = walker.copyPaths(cfg); err != nil { log.Fatal(err) } if cfg.AllowOverwrites { // if we allow overwrites then we must wait until all copy paths are calculated before starting // any copy operations for outputPath, file := range copySet { queue <- common.NewCopyOpts(file.Path, outputPath, file.FileInfo, file.Hardlink, cfg.Verbose) } } close(queue) wg.Wait() }