Skip to content

Commit

Permalink
lakectl local: add option to ignore symlinks (#8055)
Browse files Browse the repository at this point in the history
* lakectl local: add option to ignore symlinks

* Fix unit test

* CR Fixes

* CR Fixes 2
  • Loading branch information
N-o-Z authored Aug 6, 2024
1 parent 5874a71 commit aec418c
Show file tree
Hide file tree
Showing 15 changed files with 201 additions and 75 deletions.
6 changes: 5 additions & 1 deletion cmd/lakectl/cmd/fs_download.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@ var fsDownloadCmd = &cobra.Command{
}
}()

s := local.NewSyncManager(ctx, client, getHTTPClient(), syncFlags, false)
s := local.NewSyncManager(ctx, client, getHTTPClient(), local.Config{
SyncFlags: syncFlags,
SkipNonRegularFiles: cfg.Local.SkipNonRegularFiles,
IncludePerm: false,
})
err := s.Sync(dest, remote, ch)
if err != nil {
DieErr(err)
Expand Down
6 changes: 5 additions & 1 deletion cmd/lakectl/cmd/fs_upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ var fsUploadCmd = &cobra.Command{
c <- change
}
}()
s := local.NewSyncManager(ctx, client, getHTTPClient(), syncFlags, false)
s := local.NewSyncManager(ctx, client, getHTTPClient(), local.Config{
SyncFlags: syncFlags,
SkipNonRegularFiles: cfg.Local.SkipNonRegularFiles,
IncludePerm: false,
})
fullPath, err := filepath.Abs(source)
if err != nil {
DieErr(err)
Expand Down
5 changes: 4 additions & 1 deletion cmd/lakectl/cmd/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ func localDiff(ctx context.Context, client apigen.ClientWithResponsesInterface,
return local.ListRemote(ctx, client, remote, currentRemoteState, includePOSIXPermissions)
})

changes, err := local.DiffLocalWithHead(currentRemoteState, path, includePOSIXPermissions, includePOSIXPermissions)
changes, err := local.DiffLocalWithHead(currentRemoteState, path, local.Config{
SkipNonRegularFiles: cfg.Local.SkipNonRegularFiles,
IncludePerm: cfg.Experimental.Local.POSIXPerm.Enabled,
})
if err != nil {
DieErr(err)
}
Expand Down
6 changes: 5 additions & 1 deletion cmd/lakectl/cmd/local_checkout.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ func localCheckout(cmd *cobra.Command, localPath string, specifiedRef string, co
currentBase := remote.WithRef(idx.AtHead)
diffs := local.Undo(localDiff(cmd.Context(), client, currentBase, idx.LocalPath()))
sigCtx := localHandleSyncInterrupt(cmd.Context(), idx, string(checkoutOperation))
syncMgr := local.NewSyncManager(sigCtx, client, getHTTPClient(), syncFlags, cfg.Experimental.Local.POSIXPerm.Enabled)
syncMgr := local.NewSyncManager(sigCtx, client, getHTTPClient(), local.Config{
SyncFlags: syncFlags,
SkipNonRegularFiles: cfg.Local.SkipNonRegularFiles,
IncludePerm: cfg.Experimental.Local.POSIXPerm.Enabled,
})
// confirm on local changes
if confirmByFlag && len(diffs) > 0 {
fmt.Println("Uncommitted changes exist, the operation will revert all changes on local directory.")
Expand Down
6 changes: 5 additions & 1 deletion cmd/lakectl/cmd/local_clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ var localCloneCmd = &cobra.Command{
DieErr(err)
}
sigCtx := localHandleSyncInterrupt(ctx, idx, string(cloneOperation))
s := local.NewSyncManager(sigCtx, client, getHTTPClient(), syncFlags, cfg.Experimental.Local.POSIXPerm.Enabled)
s := local.NewSyncManager(sigCtx, client, getHTTPClient(), local.Config{
SyncFlags: syncFlags,
SkipNonRegularFiles: cfg.Local.SkipNonRegularFiles,
IncludePerm: cfg.Experimental.Local.POSIXPerm.Enabled,
})
err = s.Sync(localPath, stableRemote, ch)
if err != nil {
DieErr(err)
Expand Down
6 changes: 5 additions & 1 deletion cmd/lakectl/cmd/local_commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,11 @@ var localCommitCmd = &cobra.Command{
}

sigCtx := localHandleSyncInterrupt(cmd.Context(), idx, string(commitOperation))
s := local.NewSyncManager(sigCtx, client, getHTTPClient(), syncFlags, cfg.Experimental.Local.POSIXPerm.Enabled)
s := local.NewSyncManager(sigCtx, client, getHTTPClient(), local.Config{
SyncFlags: syncFlags,
SkipNonRegularFiles: cfg.Local.SkipNonRegularFiles,
IncludePerm: cfg.Experimental.Local.POSIXPerm.Enabled,
})
err = s.Sync(idx.LocalPath(), remote, c)
if err != nil {
DieErr(err)
Expand Down
6 changes: 5 additions & 1 deletion cmd/lakectl/cmd/local_pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@ var localPullCmd = &cobra.Command{
return nil
})
sigCtx := localHandleSyncInterrupt(cmd.Context(), idx, string(pullOperation))
s := local.NewSyncManager(sigCtx, client, getHTTPClient(), syncFlags, cfg.Experimental.Local.POSIXPerm.Enabled)
s := local.NewSyncManager(sigCtx, client, getHTTPClient(), local.Config{
SyncFlags: syncFlags,
SkipNonRegularFiles: cfg.Local.SkipNonRegularFiles,
IncludePerm: cfg.Experimental.Local.POSIXPerm.Enabled,
})
err = s.Sync(idx.LocalPath(), newBase, c)
if err != nil {
DieErr(err)
Expand Down
5 changes: 5 additions & 0 deletions cmd/lakectl/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ type Configuration struct {
// setting FixSparkPlaceholder to true will change spark placeholder with the actual location. for more information see https://github.com/treeverse/lakeFS/issues/2213
FixSparkPlaceholder bool `mapstructure:"fix_spark_placeholder"`
}
Local struct {
// SkipNonRegularFiles - By default lakectl local fails if local directory contains a symbolic link. When set, lakectl will ignore the symbolic links instead.
SkipNonRegularFiles bool `mapstructure:"skip_non_regular_files"`
} `mapstructure:"local"`
// Experimental - Use caution when enabling experimental features. It should only be used after consulting with the lakeFS team!
Experimental struct {
Local struct {
Expand Down Expand Up @@ -549,6 +553,7 @@ func initConfig() {
viper.SetDefault("server.retries.max_wait_interval", defaultMaxRetryInterval)
viper.SetDefault("server.retries.min_wait_interval", defaultMinRetryInterval)
viper.SetDefault("experimental.local.posix_permissions.enabled", false)
viper.SetDefault("local.skip_non_regular_files", false)

cfgErr = viper.ReadInConfig()
}
80 changes: 78 additions & 2 deletions esti/lakectl_local_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -530,8 +530,8 @@ func TestLakectlLocal_commit(t *testing.T) {
RunCmdAndVerifyContainsText(t, Lakectl()+" local status "+dataDir, false, "No diff found", vars)

// Modify local folder - add and remove files
os.MkdirAll(filepath.Join(dataDir, "subdir"), os.ModePerm)
os.MkdirAll(filepath.Join(dataDir, "subdir-a"), os.ModePerm)
require.NoError(t, os.MkdirAll(filepath.Join(dataDir, "subdir"), os.ModePerm))
require.NoError(t, os.MkdirAll(filepath.Join(dataDir, "subdir-a"), os.ModePerm))
fd, err = os.Create(filepath.Join(dataDir, "subdir", "test.txt"))
require.NoError(t, err)
fd, err = os.Create(filepath.Join(dataDir, "subdir-a", "test.txt"))
Expand All @@ -554,6 +554,82 @@ func TestLakectlLocal_commit(t *testing.T) {
}
}

func TestLakectlLocal_commit_symlink(t *testing.T) {
tmpDir := t.TempDir()
fd, err := os.CreateTemp(tmpDir, "")
require.NoError(t, err)
require.NoError(t, fd.Close())
repoName := generateUniqueRepositoryName()
storage := generateUniqueStorageNamespace(repoName)
vars := map[string]string{
"REPO": repoName,
"STORAGE": storage,
"BRANCH": mainBranch,
"REF": mainBranch,
"PREFIX": "",
}

// No repo
vars["LOCAL_DIR"] = tmpDir
runCmd(t, Lakectl()+" repo create lakefs://"+repoName+" "+storage, false, false, vars)
runCmd(t, Lakectl()+" log lakefs://"+repoName+"/"+mainBranch, false, false, vars)

tests := []struct {
name string
skipSymlink bool
}{
{
name: "skip-symlink",
skipSymlink: true,
},
{
name: "fail-on-symlink",
skipSymlink: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dataDir, err := os.MkdirTemp(tmpDir, "")
require.NoError(t, err)
file := filepath.Join(dataDir, "file1.txt")
require.NoError(t, os.WriteFile(file, []byte("foo"), os.ModePerm))
symlink := filepath.Join(dataDir, "link_file1.txt")
require.NoError(t, os.Symlink(file, symlink))

runCmd(t, Lakectl()+" branch create lakefs://"+repoName+"/"+tt.name+" --source lakefs://"+repoName+"/"+mainBranch, false, false, vars)

vars["LOCAL_DIR"] = dataDir
vars["PREFIX"] = ""
vars["BRANCH"] = tt.name
vars["REF"] = tt.name
lakectlCmd := Lakectl()
if tt.skipSymlink {
lakectlCmd = "LAKECTL_LOCAL_SKIP_NON_REGULAR_FILES=true " + lakectlCmd
}
runCmd(t, lakectlCmd+" local init lakefs://"+repoName+"/"+vars["BRANCH"]+"/"+vars["PREFIX"]+" "+dataDir, false, false, vars)
if tt.skipSymlink {
RunCmdAndVerifyContainsText(t, lakectlCmd+" local status "+dataDir, false, "local ║ added ║ file1.txt", vars)
} else {
RunCmdAndVerifyFailureContainsText(t, lakectlCmd+" local status "+dataDir, false, "link_file1.txt: not a regular file", vars)
}

// Commit changes to branch
if tt.skipSymlink {
RunCmdAndVerifyContainsText(t, lakectlCmd+" local commit -m test "+dataDir, false, "Commit for branch \"${BRANCH}\" completed", vars)
} else {
RunCmdAndVerifyFailureContainsText(t, lakectlCmd+" local commit -m test "+dataDir, false, "link_file1.txt: not a regular file", vars)
}

// Check diff after commit
if tt.skipSymlink {
RunCmdAndVerifyContainsText(t, lakectlCmd+" local status "+dataDir, false, "No diff found", vars)
} else {
RunCmdAndVerifyFailureContainsText(t, lakectlCmd+" local status "+dataDir, false, "link_file1.txt: not a regular file", vars)
}
})
}
}

func TestLakectlLocal_commit_remote_uncommitted(t *testing.T) {
tmpDir := t.TempDir()
fd, err := os.CreateTemp(tmpDir, "")
Expand Down
18 changes: 5 additions & 13 deletions pkg/fileutil/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ const (
)

var (
ErrNotFile = errors.New("path is not a file")
ErrBadPath = errors.New("bad path traversal blocked")
ErrSymbolicLink = errors.New("symbolic links not supported")
ErrInvalidPath = errors.New("invalid path")
ErrNotFile = errors.New("path is not a file")
ErrBadPath = errors.New("bad path traversal blocked")
ErrNotARegularFile = errors.New("not a regular file")
ErrInvalidPath = errors.New("invalid path")
)

// IsDir Returns true if p is a directory, otherwise false
Expand Down Expand Up @@ -170,21 +170,13 @@ func VerifyRelPath(relPath, basePath string) error {
return VerifyAbsPath(abs, basePath)
}

// VerifySafeFilename checks that the given file name is not a symbolic link and that
// the file name does not contain path traversal
// VerifySafeFilename checks that the given file name is absolute and does not contain path traversal
func VerifySafeFilename(absPath string) error {
if err := VerifyAbsPath(absPath, absPath); err != nil {
return err
}
if !filepath.IsAbs(absPath) {
return fmt.Errorf("relative path not allowed: %w", ErrInvalidPath)
}
filename, err := filepath.EvalSymlinks(absPath)
if err != nil {
return err
}
if filename != absPath {
return ErrSymbolicLink
}
return nil
}
23 changes: 23 additions & 0 deletions pkg/local/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package local

import "github.com/treeverse/lakefs/pkg/api/apiutil"

const (
// DefaultDirectoryPermissions Octal representation of default folder permissions
DefaultDirectoryPermissions = 0o040777
ClientMtimeMetadataKey = apiutil.LakeFSMetadataPrefix + "client-mtime"
)

type SyncFlags struct {
Parallelism int
Presign bool
PresignMultipart bool
}

type Config struct {
SyncFlags
// SkipNonRegularFiles - By default lakectl local fails if local directory contains irregular files. When set, lakectl will skip these files instead.
SkipNonRegularFiles bool
// IncludePerm - Experimental: preserve Unix file permissions
IncludePerm bool
}
39 changes: 25 additions & 14 deletions pkg/local/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ import (
"net/http"
"os"
"path/filepath"
"slices"
"strings"

"github.com/go-openapi/swag"
"github.com/treeverse/lakefs/pkg/api/apigen"
"github.com/treeverse/lakefs/pkg/fileutil"
"github.com/treeverse/lakefs/pkg/gateway/path"
"github.com/treeverse/lakefs/pkg/uri"
)
Expand Down Expand Up @@ -266,7 +268,7 @@ func WalkS3(root string, callbackFunc func(p string, info fs.FileInfo, err error
// DiffLocalWithHead Checks changes between a local directory and the head it is pointing to. The diff check assumes the remote
// is an immutable set so any changes found resulted from changes in the local directory
// left is an object channel which contains results from a remote source. rightPath is the local directory to diff with
func DiffLocalWithHead(left <-chan apigen.ObjectStats, rightPath string, includeDirs, includePOSIXPermissions bool) (Changes, error) {
func DiffLocalWithHead(left <-chan apigen.ObjectStats, rightPath string, cfg Config) (Changes, error) {
// left should be the base commit
changes := make([]*Change, 0)

Expand All @@ -279,7 +281,11 @@ func DiffLocalWithHead(left <-chan apigen.ObjectStats, rightPath string, include
return err
}

if !includeLocalFileInDiff(info, includeDirs) {
includeFile, err := includeLocalFileInDiff(info, cfg)
if err != nil {
return err
}
if !includeFile {
return nil
}

Expand All @@ -299,7 +305,7 @@ func DiffLocalWithHead(left <-chan apigen.ObjectStats, rightPath string, include
}
switch {
case currentRemoteFile.Path < localPath: // We removed a file locally
if includeRemoteFileInDiff(currentRemoteFile, includeDirs) {
if includeRemoteFileInDiff(currentRemoteFile, cfg) {
changes = append(changes, &Change{ChangeSourceLocal, currentRemoteFile.Path, ChangeTypeRemoved})
}
currentRemoteFile.Path = ""
Expand All @@ -312,7 +318,7 @@ func DiffLocalWithHead(left <-chan apigen.ObjectStats, rightPath string, include
// dirs might have different sizes on different operating systems
sizeChanged := !info.IsDir() && localBytes != swag.Int64Value(currentRemoteFile.SizeBytes)
mtimeChanged := localMtime != remoteMtime
permissionsChanged := includePOSIXPermissions && isPermissionsChanged(info, currentRemoteFile)
permissionsChanged := cfg.IncludePerm && isPermissionsChanged(info, currentRemoteFile)
if sizeChanged || mtimeChanged || permissionsChanged {
// we made a change!
changes = append(changes, &Change{ChangeSourceLocal, localPath, ChangeTypeModified})
Expand Down Expand Up @@ -385,19 +391,24 @@ func ListRemote(ctx context.Context, client apigen.ClientWithResponsesInterface,
return nil
}

func includeLocalFileInDiff(info fs.FileInfo, includeDirs bool) bool {
var ignoreFileList = []string{
IndexFileName,
".DS_Store",
}

func includeLocalFileInDiff(info fs.FileInfo, cfg Config) (bool, error) {
if info.IsDir() {
return includeDirs
} else {
switch info.Name() {
case IndexFileName, ".DS_Store":
return false
default:
return true
return cfg.IncludePerm, nil
}
if !info.Mode().IsRegular() {
if !cfg.SkipNonRegularFiles {
return false, fmt.Errorf("%s: %w", info.Name(), fileutil.ErrNotARegularFile)
}
return false, nil
}
return !slices.Contains(ignoreFileList, info.Name()), nil
}

func includeRemoteFileInDiff(currentRemoteFile apigen.ObjectStats, includeDirs bool) bool {
return includeDirs || !strings.HasSuffix(currentRemoteFile.Path, uri.PathSeparator)
func includeRemoteFileInDiff(currentRemoteFile apigen.ObjectStats, cfg Config) bool {
return cfg.IncludePerm || !strings.HasSuffix(currentRemoteFile.Path, uri.PathSeparator)
}
4 changes: 3 additions & 1 deletion pkg/local/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,9 @@ func TestDiffLocal(t *testing.T) {
lc := make(chan apigen.ObjectStats, len(left))
makeChan(lc, left)

changes, err := local.DiffLocalWithHead(lc, tt.LocalPath, tt.IncludeUnixPermissions, tt.IncludeUnixPermissions)
changes, err := local.DiffLocalWithHead(lc, tt.LocalPath, local.Config{
IncludePerm: tt.IncludeUnixPermissions,
})

if tt.CleanLocalPath != nil {
tt.CleanLocalPath(tt.LocalPath)
Expand Down
Loading

0 comments on commit aec418c

Please sign in to comment.