2023-01-02 00:14:48 +00:00
package main
2023-01-03 17:42:20 +00:00
import (
"encoding/json"
"fmt"
2023-02-01 20:30:42 +00:00
"io"
2023-01-06 18:29:45 +00:00
"io/fs"
2023-01-03 17:42:20 +00:00
"log"
"os"
"path"
"path/filepath"
"strings"
2023-01-15 20:41:00 +00:00
"sync"
2023-01-03 17:42:20 +00:00
2023-01-15 20:17:30 +00:00
"github.com/aspect-build/bazel-lib/tools/common"
2023-01-05 22:24:24 +00:00
"github.com/bmatcuk/doublestar/v4"
2023-01-03 17:42:20 +00:00
"golang.org/x/exp/maps"
)
type fileInfo struct {
2023-01-06 18:29:45 +00:00
Package string ` json:"package" `
Path string ` json:"path" `
RootPath string ` json:"root_path" `
ShortPath string ` json:"short_path" `
Workspace string ` json:"workspace" `
WorkspacePath string ` json:"workspace_path" `
2023-01-14 15:55:23 +00:00
Hardlink bool ` json:"hardlink" `
2023-01-06 18:29:45 +00:00
2023-01-16 23:41:39 +00:00
Realpath string
2023-01-06 18:29:45 +00:00
FileInfo fs . FileInfo
2023-01-03 17:42:20 +00:00
}
type config struct {
AllowOverwrites bool ` json:"allow_overwrites" `
Dst string ` json:"dst" `
ExcludeSrcsPackages [ ] string ` json:"exclude_srcs_packages" `
ExcludeSrcsPatterns [ ] string ` json:"exclude_srcs_patterns" `
Files [ ] fileInfo ` json:"files" `
IncludeExternalRepositories [ ] string ` json:"include_external_repositories" `
IncludeSrcsPackages [ ] string ` json:"include_srcs_packages" `
IncludeSrcsPatterns [ ] string ` json:"include_srcs_patterns" `
ReplacePrefixes map [ string ] string ` json:"replace_prefixes" `
RootPaths [ ] string ` json:"root_paths" `
Verbose bool ` json:"verbose" `
2023-01-05 22:24:24 +00:00
ReplacePrefixesKeys [ ] string
2023-09-21 18:34:57 +00:00
TargetWorkspace * string
2023-01-03 17:42:20 +00:00
}
2023-01-06 18:29:45 +00:00
type copyMap map [ string ] fileInfo
type pathSet map [ string ] bool
2023-01-03 17:42:20 +00:00
2023-01-15 20:41:00 +00:00
var copySet = copyMap { }
var mkdirSet = pathSet { }
2023-09-21 18:34:57 +00:00
func parseConfig ( configPath string , wkspName * string ) ( * config , error ) {
2023-01-03 17:42:20 +00:00
f , err := os . Open ( configPath )
if err != nil {
return nil , fmt . Errorf ( "failed to open config file: %w" , err )
}
defer f . Close ( )
2023-02-01 20:30:42 +00:00
byteValue , err := io . ReadAll ( f )
2023-01-03 17:42:20 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to read config file: %w" , err )
}
var cfg config
if err := json . Unmarshal ( [ ] byte ( byteValue ) , & cfg ) ; err != nil {
return nil , fmt . Errorf ( "failed to parse config file: %w" , err )
}
cfg . ReplacePrefixesKeys = maps . Keys ( cfg . ReplacePrefixes )
2023-09-21 18:34:57 +00:00
cfg . TargetWorkspace = wkspName
2023-01-03 17:42:20 +00:00
return & cfg , nil
}
2023-01-05 22:24:24 +00:00
func anyGlobsMatch ( globs [ ] string , test string ) ( bool , error ) {
for _ , g := range globs {
match , err := doublestar . Match ( g , test )
2023-01-03 17:42:20 +00:00
if err != nil {
2023-01-05 22:24:24 +00:00
return false , err
2023-01-03 17:42:20 +00:00
}
2023-01-05 22:24:24 +00:00
if match {
return true , nil
2023-01-03 17:42:20 +00:00
}
}
2023-01-05 22:24:24 +00:00
return false , nil
2023-01-03 17:42:20 +00:00
}
2023-01-05 22:24:24 +00:00
func longestGlobsMatch ( globs [ ] string , test string ) ( string , int , error ) {
2023-01-03 17:42:20 +00:00
result := ""
index := 0
for i , g := range globs {
2023-01-05 22:24:24 +00:00
match , err := longestGlobMatch ( g , test )
if err != nil {
return "" , 0 , err
}
2023-01-03 17:42:20 +00:00
if len ( match ) > len ( result ) {
result = match
index = i
}
}
2023-01-05 22:24:24 +00:00
return result , index , nil
2023-01-03 17:42:20 +00:00
}
2023-01-05 22:24:24 +00:00
func longestGlobMatch ( g string , test string ) ( string , error ) {
2023-01-03 17:42:20 +00:00
for i := 0 ; i < len ( test ) ; i ++ {
t := test [ : len ( test ) - i ]
2023-01-05 22:24:24 +00:00
match , err := doublestar . Match ( g , t )
if err != nil {
return "" , err
}
if match {
return t , nil
2023-01-03 17:42:20 +00:00
}
}
2023-01-05 22:24:24 +00:00
return "" , nil
2023-01-03 17:42:20 +00:00
}
2023-02-01 20:30:42 +00:00
type walker struct {
queue chan <- common . CopyOpts
}
func ( w * walker ) copyDir ( cfg * config , srcPaths pathSet , file fileInfo ) error {
2023-01-06 18:29:45 +00:00
if srcPaths == nil {
srcPaths = pathSet { }
}
srcPaths [ file . Path ] = true
// filepath.WalkDir walks the file tree rooted at root, calling fn for each file or directory in
// the tree, including root. See https://pkg.go.dev/path/filepath#WalkDir for more info.
2023-01-16 23:41:39 +00:00
walkPath := file . Path
if file . Realpath != "" {
walkPath = file . Realpath
}
return filepath . WalkDir ( walkPath , func ( p string , dirEntry fs . DirEntry , err error ) error {
2023-01-15 20:17:30 +00:00
if err != nil {
return err
}
if dirEntry . IsDir ( ) {
2023-01-06 18:29:45 +00:00
// remember that this directory was visited to prevent infinite recursive symlink loops and
// then short-circuit by returning nil since filepath.Walk will visit files contained within
// this directory automatically
srcPaths [ p ] = true
return nil
2023-01-03 17:42:20 +00:00
}
2023-01-06 18:29:45 +00:00
2023-01-15 20:17:30 +00:00
info , err := dirEntry . Info ( )
if err != nil {
return err
}
2023-01-16 23:41:39 +00:00
r , err := common . FileRel ( walkPath , p )
2023-01-16 23:09:18 +00:00
if err != nil {
return err
}
2023-01-06 18:29:45 +00:00
if info . Mode ( ) & os . ModeSymlink == os . ModeSymlink {
2023-01-14 23:51:01 +00:00
// symlink to directories are intentionally never followed by filepath.Walk to avoid infinite recursion
2023-01-17 05:46:50 +00:00
linkPath , err := common . Realpath ( p )
2023-01-06 18:29:45 +00:00
if err != nil {
2024-05-28 16:42:06 +00:00
if os . IsNotExist ( err ) {
return fmt . Errorf ( "failed to get realpath of dangling symlink %s: %w" , p , err )
}
return fmt . Errorf ( "failed to get realpath of %s: %w" , p , err )
2023-01-06 18:29:45 +00:00
}
if srcPaths [ linkPath ] {
// recursive symlink; silently ignore
return nil
}
stat , err := os . Stat ( linkPath )
if err != nil {
2023-01-14 23:51:01 +00:00
return fmt . Errorf ( "failed to stat file %s pointed to by symlink %s: %w" , linkPath , p , err )
2023-01-06 18:29:45 +00:00
}
if stat . IsDir ( ) {
2023-01-14 23:51:01 +00:00
// symlink points to a directory
2023-01-06 18:29:45 +00:00
f := fileInfo {
Package : file . Package ,
Path : linkPath ,
RootPath : file . RootPath ,
2024-05-28 16:42:06 +00:00
ShortPath : file . ShortPath ,
2023-01-06 18:29:45 +00:00
Workspace : file . Workspace ,
2024-05-28 16:42:06 +00:00
WorkspacePath : file . WorkspacePath ,
2023-01-14 15:55:23 +00:00
Hardlink : file . Hardlink ,
2023-01-06 18:29:45 +00:00
FileInfo : stat ,
}
2023-02-01 20:30:42 +00:00
return w . copyDir ( cfg , srcPaths , f )
2023-01-06 18:29:45 +00:00
} else {
2023-01-14 23:51:01 +00:00
// symlink points to a regular file
2023-01-06 18:29:45 +00:00
f := fileInfo {
Package : file . Package ,
Path : linkPath ,
RootPath : file . RootPath ,
ShortPath : path . Join ( file . ShortPath , r ) ,
Workspace : file . Workspace ,
WorkspacePath : path . Join ( file . WorkspacePath , r ) ,
2023-01-14 15:55:23 +00:00
Hardlink : file . Hardlink ,
2023-01-06 18:29:45 +00:00
FileInfo : stat ,
}
2023-02-01 20:30:42 +00:00
return w . copyPath ( cfg , f )
2023-01-06 18:29:45 +00:00
}
}
// a regular file
f := fileInfo {
Package : file . Package ,
Path : p ,
RootPath : file . RootPath ,
ShortPath : path . Join ( file . ShortPath , r ) ,
Workspace : file . Workspace ,
WorkspacePath : path . Join ( file . WorkspacePath , r ) ,
2023-01-14 15:55:23 +00:00
Hardlink : file . Hardlink ,
2023-01-06 18:29:45 +00:00
FileInfo : info ,
}
2023-02-01 20:30:42 +00:00
return w . copyPath ( cfg , f )
2023-01-03 17:42:20 +00:00
} )
}
2024-05-28 16:42:06 +00:00
func ( w * walker ) calculateOutputPath ( cfg * config , file fileInfo ) ( string , error ) {
2023-01-03 17:42:20 +00:00
// Apply filters and transformations in the following order:
//
// - `include_external_repositories`
// - `include_srcs_packages`
// - `exclude_srcs_packages`
// - `root_paths`
// - `include_srcs_patterns`
// - `exclude_srcs_patterns`
// - `replace_prefixes`
//
// If you change this order please update the docstrings in the copy_to_directory rule.
outputPath := file . WorkspacePath
outputRoot := path . Dir ( outputPath )
// apply include_external_repositories (if the file is from an external repository)
2023-09-21 18:34:57 +00:00
// automatically include files from the same workspace as this target, even if
// that is an external workspace with respect to `__main__`
if file . Workspace != "" && ( cfg . TargetWorkspace == nil || file . Workspace != * cfg . TargetWorkspace ) {
2023-01-05 22:24:24 +00:00
match , err := anyGlobsMatch ( cfg . IncludeExternalRepositories , file . Workspace )
if err != nil {
2024-05-28 16:42:06 +00:00
return "" , err
2023-01-05 22:24:24 +00:00
}
if ! match {
2024-05-28 16:42:06 +00:00
return "" , nil // external workspace is not included
2023-01-03 17:42:20 +00:00
}
}
// apply include_srcs_packages
2023-01-05 22:24:24 +00:00
match , err := anyGlobsMatch ( cfg . IncludeSrcsPackages , file . Package )
if err != nil {
2024-05-28 16:42:06 +00:00
return "" , err
2023-01-05 22:24:24 +00:00
}
if ! match {
2024-05-28 16:42:06 +00:00
return "" , nil // package is not included
2023-01-03 17:42:20 +00:00
}
// apply exclude_srcs_packages
2023-01-05 22:24:24 +00:00
match , err = anyGlobsMatch ( cfg . ExcludeSrcsPackages , file . Package )
if err != nil {
2024-05-28 16:42:06 +00:00
return "" , err
2023-01-05 22:24:24 +00:00
}
if match {
2024-05-28 16:42:06 +00:00
return "" , nil // package is excluded
2023-01-03 17:42:20 +00:00
}
// apply root_paths
2023-01-05 22:24:24 +00:00
rootPathMatch , _ , err := longestGlobsMatch ( cfg . RootPaths , outputRoot )
if err != nil {
2024-05-28 16:42:06 +00:00
return "" , err
2023-01-05 22:24:24 +00:00
}
2023-01-03 17:42:20 +00:00
if rootPathMatch != "" {
2023-02-01 20:30:42 +00:00
outputPath = strings . TrimPrefix ( outputPath [ len ( rootPathMatch ) : ] , "/" )
2023-01-03 17:42:20 +00:00
}
// apply include_srcs_patterns
2023-01-05 22:24:24 +00:00
match , err = anyGlobsMatch ( cfg . IncludeSrcsPatterns , outputPath )
if err != nil {
2024-05-28 16:42:06 +00:00
return "" , err
2023-01-05 22:24:24 +00:00
}
if ! match {
2024-05-28 16:42:06 +00:00
return "" , nil // outputPath is not included
2023-01-03 17:42:20 +00:00
}
2023-01-05 22:24:24 +00:00
// apply exclude_srcs_patterns
match , err = anyGlobsMatch ( cfg . ExcludeSrcsPatterns , outputPath )
if err != nil {
2024-05-28 16:42:06 +00:00
return "" , err
2023-01-05 22:24:24 +00:00
}
if match {
2024-05-28 16:42:06 +00:00
return "" , nil // outputPath is excluded
2023-01-03 17:42:20 +00:00
}
// apply replace_prefixes
2023-01-05 22:24:24 +00:00
replacePrefixMatch , replacePrefixIndex , err := longestGlobsMatch ( cfg . ReplacePrefixesKeys , outputPath )
if err != nil {
2024-05-28 16:42:06 +00:00
return "" , err
2023-01-05 22:24:24 +00:00
}
2023-01-03 17:42:20 +00:00
if replacePrefixMatch != "" {
replaceWith := cfg . ReplacePrefixes [ cfg . ReplacePrefixesKeys [ replacePrefixIndex ] ]
outputPath = replaceWith + outputPath [ len ( replacePrefixMatch ) : ]
}
2024-05-28 16:42:06 +00:00
return path . Join ( cfg . Dst , outputPath ) , nil
}
func ( w * walker ) copyPath ( cfg * config , file fileInfo ) error {
outputPath , err := w . calculateOutputPath ( cfg , file )
if err != nil {
return fmt . Errorf ( "failed to calculate output path %s: %w" , file . WorkspacePath , err )
}
if outputPath == "" {
// this path is excluded
return nil
}
2023-01-03 17:42:20 +00:00
// add this file to the copy Paths
2023-01-15 20:41:00 +00:00
dup , exists := copySet [ outputPath ]
2023-01-03 17:42:20 +00:00
if exists {
2023-01-15 20:41:00 +00:00
if dup . ShortPath == file . ShortPath && file . FileInfo . Size ( ) == dup . FileInfo . Size ( ) {
// this is likely the same file listed twice: the original in the source tree and the copy in the output tree
return nil
2023-01-03 17:42:20 +00:00
} else if ! cfg . AllowOverwrites {
return fmt . Errorf ( "duplicate output file '%s' configured from source files '%s' and '%s'; set 'allow_overwrites' to True to allow this overwrites but keep in mind that order matters when this is set" , outputPath , dup . Path , file . Path )
}
}
2023-01-15 20:41:00 +00:00
copySet [ outputPath ] = file
outputDir := path . Dir ( outputPath )
if ! mkdirSet [ outputDir ] {
if err = os . MkdirAll ( outputDir , os . ModePerm ) ; err != nil {
return err
}
// https://pkg.go.dev/path#Dir
for len ( outputDir ) > 0 && outputDir != "/" && outputDir != "." {
mkdirSet [ outputDir ] = true
outputDir = path . Dir ( outputDir )
}
}
if ! cfg . AllowOverwrites {
// if we don't allow overwrites then we can start copying as soon as a copy is calculated
2023-02-01 20:30:42 +00:00
w . queue <- common . NewCopyOpts ( file . Path , outputPath , file . FileInfo , file . Hardlink , cfg . Verbose )
2023-01-15 20:41:00 +00:00
}
2023-01-03 17:42:20 +00:00
return nil
}
2023-02-01 20:30:42 +00:00
func ( w * walker ) copyPaths ( cfg * config ) error {
2023-01-03 17:42:20 +00:00
for _ , file := range cfg . Files {
2023-01-16 23:41:39 +00:00
info , err := os . Lstat ( file . Path )
2023-01-06 18:29:45 +00:00
if err != nil {
2023-01-16 23:41:39 +00:00
return fmt . Errorf ( "failed to lstat file %s: %w" , file . Path , err )
}
if info . Mode ( ) & os . ModeSymlink == os . ModeSymlink {
// On Windows, filepath.WalkDir doesn't like directory symlinks so we must
// call filepath.WalkDir on the realpath
2023-01-17 05:46:50 +00:00
realpath , err := common . Realpath ( file . Path )
2023-01-16 23:41:39 +00:00
if err != nil {
2024-05-28 16:42:06 +00:00
if os . IsNotExist ( err ) {
return fmt . Errorf ( "failed to get realpath of dangling symlink %s: %w" , file . Path , err )
}
return fmt . Errorf ( "failed to get realpath of %s: %w" , file . Path , err )
2023-01-16 23:41:39 +00:00
}
stat , err := os . Stat ( realpath )
if err != nil {
return fmt . Errorf ( "failed to stat file %s pointed to by symlink %s: %w" , realpath , file . Path , err )
}
file . Realpath = realpath
file . FileInfo = stat
} else {
file . FileInfo = info
2023-01-06 18:29:45 +00:00
}
2023-01-16 23:41:39 +00:00
2023-01-06 18:29:45 +00:00
if file . FileInfo . IsDir ( ) {
2023-02-01 20:30:42 +00:00
if err := w . copyDir ( cfg , nil , file ) ; err != nil {
2023-01-15 20:41:00 +00:00
return err
2023-01-03 17:42:20 +00:00
}
2023-01-06 18:29:45 +00:00
} else {
2023-02-01 20:30:42 +00:00
if err := w . copyPath ( cfg , file ) ; err != nil {
2023-01-15 20:41:00 +00:00
return err
2023-01-03 17:42:20 +00:00
}
}
}
2023-01-15 20:41:00 +00:00
return nil
2023-01-03 17:42:20 +00:00
}
2023-01-15 20:17:30 +00:00
func main ( ) {
args := os . Args [ 1 : ]
2023-01-03 20:26:51 +00:00
2023-09-21 18:34:57 +00:00
if len ( args ) != 1 && len ( args ) != 2 {
fmt . Println ( "Usage: copy_to_directory config_file [workspace_name]" )
2023-01-03 20:26:51 +00:00
os . Exit ( 1 )
}
2023-09-21 18:34:57 +00:00
configFile := args [ 0 ]
// Read workspace arg if present.
var wksp * string = nil
if len ( args ) >= 2 {
wksp = & args [ 1 ]
}
2023-09-21 21:55:57 +00:00
cfg , err := parseConfig ( configFile , wksp )
2023-01-03 17:42:20 +00:00
if err != nil {
log . Fatal ( err )
}
2023-02-01 20:30:42 +00:00
queue := make ( chan common . CopyOpts , 100 )
var wg sync . WaitGroup
const numWorkers = 10
wg . Add ( numWorkers )
for i := 0 ; i < numWorkers ; i ++ {
go common . NewCopyWorker ( queue ) . Run ( & wg )
}
walker := & walker { queue }
if err = walker . copyPaths ( cfg ) ; err != nil {
2023-01-03 17:42:20 +00:00
log . Fatal ( err )
}
2023-01-15 20:41:00 +00:00
if cfg . AllowOverwrites {
// if we allow overwrites then we must wait until all copy paths are calculated before starting
// any copy operations
for outputPath , file := range copySet {
2023-02-01 20:30:42 +00:00
queue <- common . NewCopyOpts ( file . Path , outputPath , file . FileInfo , file . Hardlink , cfg . Verbose )
2023-01-03 17:42:20 +00:00
}
}
2023-01-15 20:41:00 +00:00
2023-02-01 20:30:42 +00:00
close ( queue )
wg . Wait ( )
2023-01-02 00:14:48 +00:00
}