testing deletion and backup process

This commit is contained in:
rlogwood
2024-06-07 14:33:26 -04:00
parent 68d6b160f9
commit d08f28b391

View File

@ -3,6 +3,9 @@
# This is the directory we'll check for duplicates # This is the directory we'll check for duplicates
# TODO: make this a parameter # TODO: make this a parameter
$check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles" $check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles"
$backup_dir = "C:\Users\richa\tmp\backups"
# Methodolgy # Methodolgy
# A hash value is computed for each file, files with the same hash are considered duplicate # A hash value is computed for each file, files with the same hash are considered duplicate
@ -12,6 +15,7 @@ $check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles"
class FoundFile { class FoundFile {
[string]$Name [string]$Name
[string]$FullName
[string]$Directory [string]$Directory
[int]$Size [int]$Size
[double]$fileSizeInMB [double]$fileSizeInMB
@ -21,6 +25,7 @@ class FoundFile {
FoundFile([System.IO.FileInfo] $file, [string]$hash) { FoundFile([System.IO.FileInfo] $file, [string]$hash) {
$this.Name = $file.Name $this.Name = $file.Name
$this.FullName = $file.FullName
$this.Directory = $file.DirectoryName $this.Directory = $file.DirectoryName
$this.Size = $file.Length $this.Size = $file.Length
$this.fileSizeInMB = [math]::Round($this.Size / 1MB, 4) $this.fileSizeInMB = [math]::Round($this.Size / 1MB, 4)
@ -143,7 +148,6 @@ function Show-Duplicates {
[hashtable] $files [hashtable] $files
) )
#$hash_num = 0
$num_dups = 0 $num_dups = 0
$total_dup_size = 0 $total_dup_size = 0
@ -160,72 +164,104 @@ function Show-Duplicates {
for ($i = 0; $i -lt $dup_files.Count; $i++) { for ($i = 0; $i -lt $dup_files.Count; $i++) {
$total_dup_size += $dup_files[$i].Size $total_dup_size += $dup_files[$i].Size
Write-Output "$($hash_num): $($dup_files[$i].ToString())" Write-Output "$($hash_num): $($dup_files[$i].ToString())"
} }
#$hash_num += 1
} }
Write-Output "" Write-Output ""
Write-Output "Found $num_dups duplicate file hashes." Write-Output "Found $num_dups duplicate file hashes."
ShowSizes "Duplicate Files Size" $total_dup_size ShowSizes "Duplicate Files Size" $total_dup_size
#Write-Output "total dup size = $(Format-NumberWithCommas $total_dup_size) Bytes, $(Format-NumberWithCommas $total_dup_mb) MB"
} }
function Show-Duplicates-v1 {
function BackupDuplicateFile {
param (
[Parameter(Mandatory = $true)]
[FoundFile] $file,
[Parameter(Mandatory = $true)]
[string] $backupDirectory
)
# Define the source file path
$sourceFilePath = Join-Path -Path $file.Directory -ChildPath $file.Name
# Get the FileInfo object for the source file
$fileInfo = Get-Item -Path $sourceFilePath
# Define the backup directory
Write-Output "backup directory is: $backupDirectory"
# Ensure the backup directory exists
if (-not (Test-Path -Path $backupDirectory)) {
New-Item -Path $backupDirectory -ItemType Directory
}
# Remove the drive letter using Split-Path
$pathWithoutDrive = Split-Path -Path $File.Directory -NoQualifier
# Construct the full backup file path
#$fullBackupDirectory = Join-Path -Path $backupDirectory -ChildPath $File.Directory
$fullBackupDirectory = Join-Path -Path $backupDirectory -ChildPath $pathWithoutDrive
if (-not (Test-Path -Path $fullBackupDirectory)) {
New-Item -Path $fullBackupDirectory -ItemType Directory
}
# Copy the file to the backup directory
$fullDestinationPath = Join-Path -Path $fullBackupDirectory -ChildPath $file.Name
$fileInfo.CopyTo($fullDestinationPath, $true)
# Output the result
# Write-Output "File '$($fileInfo.FullName)'"
Write-Output " + backed up to: $fullBackupDirectory"
}
function DeleteDuplicateFile {
param (
[Parameter(Mandatory = $true)]
[FoundFile] $File
)
Write-Output ""
Write-Output "Deleting File: $($File.Name) $(Format-NumberWithCommas $File.fileSizeInKB) KB"
BackupDuplicateFile -file $File -backupDirectory $backup_dir
# Force delete a read-only or protected file
Write-Output " ... would deleting: $($File.FullName)"
#Remove-Item -Path $File.FullName -Force
Write-Output " - removed from: $($File.Directory)"
}
# NOTE: Duplicates are pre-sorted by creation date
function ProcessDuplicates {
param ( param (
[Parameter(Mandatory = $true)] [Parameter(Mandatory = $true)]
[hashtable] $files [hashtable] $files
) )
$hash_num = 0 $num_deleted = 0
$total_dup_size = 0 $total_deleted_size = 0
$num_dups = 0
Write-Output ""
foreach ($entry in $files.GetEnumerator()) { foreach ($entry in $files.GetEnumerator()) {
$dup_files = $entry.Value $dup_files = $entry.Value
if ($dup_files.Count -lt 2) {
if ($dup_files.Count -gt 1) { throw "duplicates collection contains non duplicate for entry: $($entry.Key))"
$num_dups++
Write-Output ""
Write-Output "Found duplicate $($num_dups): $($entry.Key)"
for ($i = 0; $i -lt $dup_files.Count; $i++) {
$total_dup_size += $dup_files[$i].Size
Write-Output "$($hash_num): $($dup_files[$i].ToString())"
}
} }
$hash_num += 1
}
#$total_dup_mb = [math]::Round($total_dup_size / 1MB, 4)
# delete the first duplicate in the array of dups, they are sorted by creation date ascending
$oldest_dup = $dup_files[0]
DeleteDuplicateFile -File $oldest_dup
$total_deleted_size += $oldest_dup.Size
$num_deleted += 1
}
Write-Output "" Write-Output ""
Write-Output "Found $num_dups duplicate file hashes." Write-Output "Deleted $num_deleted duplicate files"
ShowSizes "Duplicate Files Size" $total_dup_size ShowSizes "Recovered Size" $total_deleted_size
#Write-Output "total dup size = $(Format-NumberWithCommas $total_dup_size) Bytes, $(Format-NumberWithCommas $total_dup_mb) MB"
}
function TBD_Get-Duplicates {
throw "not implemented"
# need to sort the dups by date before deleteing
$dupsTable = @{}
$total_dup_size = 0
# Find and remove duplicate files
foreach ($hash in $hashTable.Keys) {
$fileGroup = $hashTable[$hash]
if ($fileGroup.Count -gt 1) {
# Keep the first file and delete the rest
$fileGroup[1..($fileGroup.Count - 1)] | ForEach-Object {
Write-Output "Would Delete duplicate file: $_"
$file = Get-Item -Path $_
$fileSize = $file.Length
$fileName = $file.Name
$total_dup_size += $fileSize
$dupsTable[$fileName] = $fileSize
# Remove-Item -Path $_ -Force
}
}
}
} }
@ -235,4 +271,6 @@ function TBD_Get-Duplicates {
$files = Get-AllFilesByHash($check_dir) | Select-Object -First 10 $files = Get-AllFilesByHash($check_dir) | Select-Object -First 10
$dups = Get-Duplicates $files $dups = Get-Duplicates $files
Show-Duplicates $dups Show-Duplicates $dups
ProcessDuplicates $dups