From d08f28b391725831cfeaf402a83fcf7f8fcbd997 Mon Sep 17 00:00:00 2001 From: rlogwood Date: Fri, 7 Jun 2024 14:33:26 -0400 Subject: [PATCH] testing deletion and backup process --- windows/frd.ps1 | 140 ++++++++++++++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 51 deletions(-) diff --git a/windows/frd.ps1 b/windows/frd.ps1 index bbebfae..02bbe24 100755 --- a/windows/frd.ps1 +++ b/windows/frd.ps1 @@ -3,6 +3,9 @@ # This is the directory we'll check for duplicates # TODO: make this a parameter $check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles" +$backup_dir = "C:\Users\richa\tmp\backups" + + # Methodolgy # A hash value is computed for each file, files with the same hash are considered duplicate @@ -12,6 +15,7 @@ $check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles" class FoundFile { [string]$Name + [string]$FullName [string]$Directory [int]$Size [double]$fileSizeInMB @@ -21,6 +25,7 @@ class FoundFile { FoundFile([System.IO.FileInfo] $file, [string]$hash) { $this.Name = $file.Name + $this.FullName = $file.FullName $this.Directory = $file.DirectoryName $this.Size = $file.Length $this.fileSizeInMB = [math]::Round($this.Size / 1MB, 4) @@ -143,7 +148,6 @@ function Show-Duplicates { [hashtable] $files ) - #$hash_num = 0 $num_dups = 0 $total_dup_size = 0 @@ -160,72 +164,104 @@ function Show-Duplicates { for ($i = 0; $i -lt $dup_files.Count; $i++) { $total_dup_size += $dup_files[$i].Size Write-Output "$($hash_num): $($dup_files[$i].ToString())" - } - - #$hash_num += 1 + } } Write-Output "" Write-Output "Found $num_dups duplicate file hashes." ShowSizes "Duplicate Files Size" $total_dup_size - #Write-Output "total dup size = $(Format-NumberWithCommas $total_dup_size) Bytes, $(Format-NumberWithCommas $total_dup_mb) MB" } -function Show-Duplicates-v1 { + +function BackupDuplicateFile { + param ( + [Parameter(Mandatory = $true)] + [FoundFile] $file, + + [Parameter(Mandatory = $true)] + [string] $backupDirectory + ) + + # Define the source file path + $sourceFilePath = Join-Path -Path $file.Directory -ChildPath $file.Name + + # Get the FileInfo object for the source file + $fileInfo = Get-Item -Path $sourceFilePath + + # Define the backup directory + Write-Output "backup directory is: $backupDirectory" + + # Ensure the backup directory exists + if (-not (Test-Path -Path $backupDirectory)) { + New-Item -Path $backupDirectory -ItemType Directory + } + + # Remove the drive letter using Split-Path + $pathWithoutDrive = Split-Path -Path $File.Directory -NoQualifier + + # Construct the full backup file path + #$fullBackupDirectory = Join-Path -Path $backupDirectory -ChildPath $File.Directory + $fullBackupDirectory = Join-Path -Path $backupDirectory -ChildPath $pathWithoutDrive + + + + if (-not (Test-Path -Path $fullBackupDirectory)) { + New-Item -Path $fullBackupDirectory -ItemType Directory + } + + # Copy the file to the backup directory + + $fullDestinationPath = Join-Path -Path $fullBackupDirectory -ChildPath $file.Name + $fileInfo.CopyTo($fullDestinationPath, $true) + + # Output the result + # Write-Output "File '$($fileInfo.FullName)'" + Write-Output " + backed up to: $fullBackupDirectory" +} + +function DeleteDuplicateFile { + param ( + [Parameter(Mandatory = $true)] + [FoundFile] $File + ) + + Write-Output "" + Write-Output "Deleting File: $($File.Name) $(Format-NumberWithCommas $File.fileSizeInKB) KB" + BackupDuplicateFile -file $File -backupDirectory $backup_dir + # Force delete a read-only or protected file + Write-Output " ... would deleting: $($File.FullName)" + #Remove-Item -Path $File.FullName -Force + Write-Output " - removed from: $($File.Directory)" +} + + + + +# NOTE: Duplicates are pre-sorted by creation date +function ProcessDuplicates { param ( [Parameter(Mandatory = $true)] [hashtable] $files ) - $hash_num = 0 - $total_dup_size = 0 - $num_dups = 0 + $num_deleted = 0 + $total_deleted_size = 0 + + Write-Output "" foreach ($entry in $files.GetEnumerator()) { $dup_files = $entry.Value - - if ($dup_files.Count -gt 1) { - $num_dups++ - Write-Output "" - Write-Output "Found duplicate $($num_dups): $($entry.Key)" - for ($i = 0; $i -lt $dup_files.Count; $i++) { - $total_dup_size += $dup_files[$i].Size - Write-Output "$($hash_num): $($dup_files[$i].ToString())" - } + if ($dup_files.Count -lt 2) { + throw "duplicates collection contains non duplicate for entry: $($entry.Key))" } - $hash_num += 1 - } - #$total_dup_mb = [math]::Round($total_dup_size / 1MB, 4) + # delete the first duplicate in the array of dups, they are sorted by creation date ascending + $oldest_dup = $dup_files[0] + DeleteDuplicateFile -File $oldest_dup + $total_deleted_size += $oldest_dup.Size + $num_deleted += 1 + } Write-Output "" - Write-Output "Found $num_dups duplicate file hashes." - ShowSizes "Duplicate Files Size" $total_dup_size - #Write-Output "total dup size = $(Format-NumberWithCommas $total_dup_size) Bytes, $(Format-NumberWithCommas $total_dup_mb) MB" - -} - -function TBD_Get-Duplicates { - throw "not implemented" - # need to sort the dups by date before deleteing - - $dupsTable = @{} - - $total_dup_size = 0 - - # Find and remove duplicate files - foreach ($hash in $hashTable.Keys) { - $fileGroup = $hashTable[$hash] - if ($fileGroup.Count -gt 1) { - # Keep the first file and delete the rest - $fileGroup[1..($fileGroup.Count - 1)] | ForEach-Object { - Write-Output "Would Delete duplicate file: $_" - $file = Get-Item -Path $_ - $fileSize = $file.Length - $fileName = $file.Name - $total_dup_size += $fileSize - $dupsTable[$fileName] = $fileSize - # Remove-Item -Path $_ -Force - } - } - } + Write-Output "Deleted $num_deleted duplicate files" + ShowSizes "Recovered Size" $total_deleted_size } @@ -235,4 +271,6 @@ function TBD_Get-Duplicates { $files = Get-AllFilesByHash($check_dir) | Select-Object -First 10 $dups = Get-Duplicates $files Show-Duplicates $dups +ProcessDuplicates $dups +