delete all duplicates for a hash, not just the first. Control execution with options. Add usage message when no options specified. Make deletion optional and backup mandetory when performing deletion

This commit is contained in:
rlogwood
2024-06-07 16:36:38 -04:00
parent d08f28b391
commit d863a0f030

View File

@ -2,15 +2,66 @@
# This is the directory we'll check for duplicates
# TODO: make this a parameter
$check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles"
$backup_dir = "C:\Users\richa\tmp\backups"
#$check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles"
#$backup_dir = "C:\Users\richa\tmp\backups3"
# = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles",
# = "C:\Users\richa\tmp\backups3"
param (
[string]$check_dir,
# Methodolgy
# A hash value is computed for each file, files with the same hash are considered duplicate
# Sort the duplicate files found for a hash by their creation date in ascending order
# Deletion canidate will be the first file in the list
[string]$backup_dir,
[switch]$show_dups,
[switch]$delete
)
function ShowUsage {
$message = @"
Find and Remove Duplicates usage:
frd.ps1 -check_dir [-delete -backup_dir (backup location)] [-show_dups]
-check_dir (where to look for duplicates)
-backup_dir (location to backup files before deletion)
-delete # will delete the duplicates
-show_dups # will show the duplicates
"@
Write-Output $message
exit 1
}
function CheckUsage {
if (-not $delete -and -not $show_dups) {
Write-Output "`n*** ERROR: No work requested, bye"
ShowUsage
}
if (-not $check_dir) {
Write-Output "`n*** ERROR: -check_dir option not specified"
ShowUsage
}
if ($delete -and -not $backup_dir) {
Write-Output "`n*** ERROR: When -delete is specified, you need to also specify the -backup_dir option"
ShowUsage
}
Write-Output "> Checking for duplicates in: $check_dir"
if ($delete) {
Write-Output "> Looking for duplicates to delete..."
}
if ($show_dups) {
Write-Output "> Showing Duplicates..."
}
if ($backup_dir) {
Write-Output "> Backing up to: $backup_dir"
}
}
class FoundFile {
@ -134,7 +185,7 @@ function Get-Duplicates {
$dup_files = $entry.Value
if ($dup_files.Count -gt 1) {
$dups_hash[$entry.key] = $dup_files | Sort-Object -Property Creation
$dups_hash[$entry.key] = $dup_files | Sort-Object -Property Creation -Descending
#$dup_files = $dups_hash[$entry.key]
}
}
@ -229,7 +280,7 @@ function DeleteDuplicateFile {
BackupDuplicateFile -file $File -backupDirectory $backup_dir
# Force delete a read-only or protected file
Write-Output " ... would deleting: $($File.FullName)"
#Remove-Item -Path $File.FullName -Force
Remove-Item -Path $File.FullName -Force
Write-Output " - removed from: $($File.Directory)"
}
@ -252,12 +303,17 @@ function ProcessDuplicates {
if ($dup_files.Count -lt 2) {
throw "duplicates collection contains non duplicate for entry: $($entry.Key))"
}
# the first file is the newest, array is sorted in descending order by creation date
# delete all the duplicates older than the first file, element 0
for ($i = 1; $i -lt $dup_files.Count; $i++) {
$file_to_delete = $dup_files[$i]
DeleteDuplicateFile -File $file_to_delete
$total_deleted_size += $file_to_delete.Size
$num_deleted += 1
}
# delete the first duplicate in the array of dups, they are sorted by creation date ascending
$oldest_dup = $dup_files[0]
DeleteDuplicateFile -File $oldest_dup
$total_deleted_size += $oldest_dup.Size
$num_deleted += 1
}
Write-Output ""
Write-Output "Deleted $num_deleted duplicate files"
@ -265,12 +321,17 @@ function ProcessDuplicates {
}
CheckUsage
$files = Get-AllFilesByHash($check_dir) | Select-Object -First 10
$dups = Get-Duplicates $files
Show-Duplicates $dups
ProcessDuplicates $dups
if ($show_dups) {
Show-Duplicates $dups
}
if ($delete) {
ProcessDuplicates $dups
}