From d863a0f0300f453cc446af33edc78466cd150291 Mon Sep 17 00:00:00 2001 From: rlogwood Date: Fri, 7 Jun 2024 16:36:38 -0400 Subject: [PATCH] delete all duplicates for a hash, not just the first. Control execution with options. Add usage message when no options specified. Make deletion optional and backup mandetory when performing deletion --- windows/frd.ps1 | 95 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 17 deletions(-) diff --git a/windows/frd.ps1 b/windows/frd.ps1 index 02bbe24..f14fe83 100755 --- a/windows/frd.ps1 +++ b/windows/frd.ps1 @@ -2,15 +2,66 @@ # This is the directory we'll check for duplicates # TODO: make this a parameter -$check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles" -$backup_dir = "C:\Users\richa\tmp\backups" +#$check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles" +#$backup_dir = "C:\Users\richa\tmp\backups3" +# = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles", + # = "C:\Users\richa\tmp\backups3" +param ( + [string]$check_dir, -# Methodolgy -# A hash value is computed for each file, files with the same hash are considered duplicate -# Sort the duplicate files found for a hash by their creation date in ascending order -# Deletion canidate will be the first file in the list + [string]$backup_dir, + + [switch]$show_dups, + + [switch]$delete +) + +function ShowUsage { + $message = @" + +Find and Remove Duplicates usage: + frd.ps1 -check_dir [-delete -backup_dir (backup location)] [-show_dups] + -check_dir (where to look for duplicates) + -backup_dir (location to backup files before deletion) + -delete # will delete the duplicates + -show_dups # will show the duplicates +"@ + + Write-Output $message + exit 1 +} + +function CheckUsage { + + if (-not $delete -and -not $show_dups) { + Write-Output "`n*** ERROR: No work requested, bye" + ShowUsage + } + + if (-not $check_dir) { + Write-Output "`n*** ERROR: -check_dir option not specified" + ShowUsage + } + + if ($delete -and -not $backup_dir) { + Write-Output "`n*** ERROR: When -delete is specified, you need to also specify the -backup_dir option" + ShowUsage + } + + Write-Output "> Checking for duplicates in: $check_dir" + if ($delete) { + Write-Output "> Looking for duplicates to delete..." + } + if ($show_dups) { + Write-Output "> Showing Duplicates..." + } + if ($backup_dir) { + Write-Output "> Backing up to: $backup_dir" + } + +} class FoundFile { @@ -134,7 +185,7 @@ function Get-Duplicates { $dup_files = $entry.Value if ($dup_files.Count -gt 1) { - $dups_hash[$entry.key] = $dup_files | Sort-Object -Property Creation + $dups_hash[$entry.key] = $dup_files | Sort-Object -Property Creation -Descending #$dup_files = $dups_hash[$entry.key] } } @@ -229,7 +280,7 @@ function DeleteDuplicateFile { BackupDuplicateFile -file $File -backupDirectory $backup_dir # Force delete a read-only or protected file Write-Output " ... would deleting: $($File.FullName)" - #Remove-Item -Path $File.FullName -Force + Remove-Item -Path $File.FullName -Force Write-Output " - removed from: $($File.Directory)" } @@ -252,12 +303,17 @@ function ProcessDuplicates { if ($dup_files.Count -lt 2) { throw "duplicates collection contains non duplicate for entry: $($entry.Key))" } + + # the first file is the newest, array is sorted in descending order by creation date + # delete all the duplicates older than the first file, element 0 + for ($i = 1; $i -lt $dup_files.Count; $i++) { + $file_to_delete = $dup_files[$i] + DeleteDuplicateFile -File $file_to_delete + $total_deleted_size += $file_to_delete.Size + $num_deleted += 1 + + } - # delete the first duplicate in the array of dups, they are sorted by creation date ascending - $oldest_dup = $dup_files[0] - DeleteDuplicateFile -File $oldest_dup - $total_deleted_size += $oldest_dup.Size - $num_deleted += 1 } Write-Output "" Write-Output "Deleted $num_deleted duplicate files" @@ -265,12 +321,17 @@ function ProcessDuplicates { } - - +CheckUsage $files = Get-AllFilesByHash($check_dir) | Select-Object -First 10 $dups = Get-Duplicates $files -Show-Duplicates $dups -ProcessDuplicates $dups + +if ($show_dups) { + Show-Duplicates $dups +} + +if ($delete) { + ProcessDuplicates $dups +}