Skip to content

Commit

Permalink
Update README and main.go: change exclude flag to -x, add -e for empt…
Browse files Browse the repository at this point in the history
…y files and -l for listing duplicates only
  • Loading branch information
sammy fischer committed Jan 8, 2025
1 parent d5959b3 commit 7611bbe
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 20 deletions.
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ Simple tool that crawls through a directory and its
subdirectories and tries to locate and remove duplicate
files. The comparison is done by computing a sha256 checksum

#### Changelog
* 1.2.1 :
* changed the exclude flag to -x instead of -e
* add -e flag to include empty files in the check
* added -l flag to avoid deleting and just list the duplicates found

#### Requirement
You'll need to have go installed if you want to compile the tool.

Expand All @@ -27,12 +33,14 @@ You can use the precompiled binaries in the release tab or ...

### Usage
```
./copycure -i /path/to/your/directory -m [-m sql|mem] [-y] [-e {csv}]
./copycure -i /path/to/your/directory -m [-m sql|mem] [-y] [-x {csv}] [-e] [-l]
```
__parameters__
* `-m mem|sql` : stores the checksums either in RAM or in a temporary SQLite database. `-m sql` is default.
Which option is best for you depends a lot on the speed of your RAM and your drive in your system.
* `-y` : do not ask for confirmation before deleting files. ( asking is default)
* `-e` : exclude any filepath containing any of the comma separated values. For example `-e .venv,.git,LICENSE.md`
would ignore anything whose path include .venv, .git or LICENSE.md
* `-x` : exclude any filepath containing any of the comma separated values. For example `-e .venv,.git,LICENSE.md`
would ignore anything whose path include .venv, .git or LICENSE.md
* `-e` : do not ignore empty files when looking for duplicates (default is to ignore files with a size of 0 bytes)
* `-l` : only list the duplicate found without deleting them. useful if you want to pipe the output somewhere.
57 changes: 40 additions & 17 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"strings"
)

var version = "1.1.1"
var version = "1.2.1"

func printOverwrite(message string) {
fmt.Printf("\r%s", message)
Expand Down Expand Up @@ -78,7 +78,7 @@ func deletePath(path string, noconfirm bool) (bool, error) {
deleted := false
var err error
if !noconfirm {
fmt.Printf("Do you want to delete %s? (y/n): ", path)
fmt.Printf("\nDo you want to delete %s? (y/n): ", path)
fmt.Scanln(&response)
} else {
response = "y"
Expand All @@ -97,22 +97,28 @@ func deletePath(path string, noconfirm bool) (bool, error) {
return deleted, err
}

func removeDuplicates(db *sql.DB, seenChecksums map[string]struct{}, directory string, allFiles int, noconfirm bool, exclude []string) (int, int, error) {
func removeDuplicates(db *sql.DB, seenChecksums map[string]struct{}, directory string, allFiles int, noConfirm bool, exclude []string, deleteEmpty bool, listMode bool) (int, int, error) {
cnt := 0
dbl := 0
pct := 0
fileCnt := 0
if !listMode {
printOverwrite(fmt.Sprintf("%d/%d (%d%%) - %d files deleted ...", fileCnt, allFiles, pct, cnt))
}

printOverwrite(fmt.Sprintf("%d/%d (%d%%) - %d files deleted ...", fileCnt, allFiles, pct, cnt))
deleteSizeLimit := int64(10)
if deleteEmpty {
deleteSizeLimit = int64(-1)
}

err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && !containsSubstring(path, exclude) {
if !info.IsDir() && !containsSubstring(path, exclude) && info.Size() > deleteSizeLimit {
fileCnt++
newPct := int(float64(fileCnt*100) / float64(allFiles))
if newPct > pct {
if newPct > pct && !listMode {
pct = newPct
printOverwrite(fmt.Sprintf("%d/%d (%d%%) - %d files deleted ...", fileCnt, allFiles, pct, cnt))
}
Expand All @@ -138,9 +144,13 @@ func removeDuplicates(db *sql.DB, seenChecksums map[string]struct{}, directory s
fmt.Printf("Failed to query database for file %s: %v\n", path, err)
return nil
} else {
deleted, _ := deletePath(path, noconfirm)
if deleted {
cnt++
if listMode {
fmt.Println(path)
} else {
deleted, _ := deletePath(path, noConfirm)
if deleted {
cnt++
}
}
dbl++
}
Expand Down Expand Up @@ -196,23 +206,34 @@ func main() {
var directory string
var mode string
var excludeParam string
fmt.Printf("CopyCure %v (written by Sammy Fischer)\n", version)
flag.StringVar(&directory, "i", "", "Directory to scan for duplicates")
flag.StringVar(&mode, "m", "sql", "Mode: 'sql' or 'mem'")
_ = flag.Bool("y", false, "Delete files without asking")
flag.StringVar(&excludeParam, "e", "", "Comma separated list of partial filenames to exclude (e.g. -e .venv/,.git/)")
_ = flag.Bool("e", false, "Delete empty files too. Empty files all look the same to copycure. The Default is to ignore them")
_ = flag.Bool("l", false, "List the duplicates instead of deleting them. -y is automatically assumed to be set")
flag.StringVar(&excludeParam, "x", "", "Comma separated list of partial filenames to exclude (e.g. -e .venv/,.git/)")
flag.Parse()
exclude := strings.Split(excludeParam, ",")

noconfirm := isFlagPassed("y")
noConfirm := isFlagPassed("y")
deleteEmpty := isFlagPassed("e")
listMode := isFlagPassed("l")

if directory == "" {
fmt.Println("Usage: copycure -i /path/to/your/directory [-m sql|mem] [-c] [-e aaa,bbb]\n" +
" -m : method to store known checksums\n" +
fmt.Printf("CopyCure %v (written by Sammy Fischer)\n", version)
fmt.Println("Usage: copycure -i /path/to/your/directory [-m sql|mem] [-y] [-x aaa,bbb] [-e] [-l]\n" +
" -m {sql|mem} : method to store known checksums. \n\tsql: use a temporary sqllite database mem: store in and array in RAM\n" +
" -y : remove files without asking\n" +
" -e : comma separated list of partial filenames to exclude (e.g. -e .venv,.git )")
" -x {comma separated list} : exclude any file whose path contains one of the list values\n\t(e.g. -e .venv,.git ignores any path containing .venv or .git)}\n" +
" -e : remove duplicate empty files (size==0). Default is to ignore them.\n" +
" -l : only list the full path to the duplicates found without deleting them.\n")
os.Exit(1)
}

if !listMode {
fmt.Printf("CopyCure %v (written by Sammy Fischer)\n", version)
}

var db *sql.DB
var seenChecksums map[string]struct{}

Expand All @@ -238,11 +259,13 @@ func main() {
os.Exit(1)
}

cnt, dbl, err := removeDuplicates(db, seenChecksums, directory, totalFiles, noconfirm, exclude)
cnt, dbl, err := removeDuplicates(db, seenChecksums, directory, totalFiles, noConfirm, exclude, deleteEmpty, listMode)
if err != nil {
fmt.Printf("Error removing duplicates: %v\n", err)
os.Exit(1)
}

fmt.Printf("%d duplicates found, %d files were removed.\n", dbl, cnt)
if !listMode {
fmt.Printf("%d duplicates found, %d files were removed.\n", dbl, cnt)
}
}

0 comments on commit 7611bbe

Please sign in to comment.