340 lines
9.4 KiB
PowerShell
Executable File
340 lines
9.4 KiB
PowerShell
Executable File
# Find and Remove Duplicates
|
|
|
|
param (
|
|
[string]$check_dir,
|
|
|
|
[string]$backup_dir,
|
|
|
|
[switch]$show_dups,
|
|
|
|
[switch]$delete
|
|
)
|
|
|
|
function ShowUsage {
|
|
$message = @"
|
|
|
|
Find and Remove Duplicates usage:
|
|
|
|
** Use with caution **
|
|
|
|
frd.ps1 -check_dir [-delete -backup_dir (backup location)] [-show_dups]
|
|
-check_dir (where to look for duplicates)
|
|
-backup_dir (location to backup files before deletion)
|
|
-delete # will delete the duplicates
|
|
-show_dups # will show the duplicates
|
|
|
|
** A typical dirctory to check would be the Office downloads directory:
|
|
-check_dir 'C:\Program Files\Microsoft Office\Updates\Download\PackageFiles'
|
|
|
|
** The backup directory can be any writable location, the destination directory is created:
|
|
-backup_dir 'C:\tmp\backups'
|
|
|
|
** Files can only be deleted if you have the needed privledge
|
|
"@
|
|
|
|
Write-Output $message
|
|
exit 1
|
|
}
|
|
|
|
function CheckUsage {
|
|
|
|
if (-not $delete -and -not $show_dups) {
|
|
Write-Output "`n*** ERROR: No work requested, bye"
|
|
ShowUsage
|
|
}
|
|
|
|
if (-not $check_dir) {
|
|
Write-Output "`n*** ERROR: -check_dir option not specified"
|
|
ShowUsage
|
|
}
|
|
|
|
if ($delete -and -not $backup_dir) {
|
|
Write-Output "`n*** ERROR: When -delete is specified, you need to also specify the -backup_dir option"
|
|
ShowUsage
|
|
}
|
|
|
|
Write-Output "> Checking for duplicates in: $check_dir"
|
|
if ($delete) {
|
|
Write-Output "> Looking for duplicates to delete..."
|
|
}
|
|
if ($show_dups) {
|
|
Write-Output "> Showing Duplicates..."
|
|
}
|
|
if ($backup_dir) {
|
|
Write-Output "> Backing up to: $backup_dir"
|
|
}
|
|
|
|
}
|
|
|
|
|
|
class FoundFile {
|
|
[string]$Name
|
|
[string]$FullName
|
|
[string]$Directory
|
|
[int]$Size
|
|
[double]$fileSizeInMB
|
|
[double]$fileSizeInKB
|
|
[datetime]$Creation
|
|
[string]$Hash
|
|
|
|
FoundFile([System.IO.FileInfo] $file, [string]$hash) {
|
|
$this.Name = $file.Name
|
|
$this.FullName = $file.FullName
|
|
$this.Directory = $file.DirectoryName
|
|
$this.Size = $file.Length
|
|
$this.fileSizeInMB = [math]::Round($this.Size / 1MB, 4)
|
|
$this.fileSizeInKB = [math]::Round($this.Size / 1KB, 2)
|
|
$this.Creation = $file.CreationTimeUtc
|
|
$this.Hash = $hash
|
|
}
|
|
|
|
# Method to display information
|
|
[void]DisplayInfo() {
|
|
Write-Output "hello"
|
|
}
|
|
|
|
[string]ToString() {
|
|
return "$($this.Directory) $($this.Name) $(Format-NumberWithCommas $this.fileSizeInKB) KB $($this.Creation)"
|
|
}
|
|
}
|
|
|
|
function ShowSizes() {
|
|
param (
|
|
[Parameter(Mandatory = $true, Position = 0)]
|
|
[string]$description,
|
|
|
|
[Parameter(Mandatory = $true, Position = 1)]
|
|
[Int64]$sizeInBytes
|
|
)
|
|
|
|
# Convert the size to kilobytes (KB)
|
|
$sizeInKB = $sizeInBytes / 1KB
|
|
|
|
# Convert the size to megabytes (MB)
|
|
$sizeInMB = $sizeInBytes / 1MB
|
|
|
|
# Convert the size to gigabytes (GB)
|
|
$sizeInGB = $sizeInBytes / 1GB
|
|
|
|
# Display the size in different units
|
|
Write-Output ""
|
|
Write-Output "$description $(Format-NumberWithCommas -Number $sizeInBytes) bytes."
|
|
Write-Output "$description $(Format-NumberWithCommas -Number $sizeInKB, 2) KB."
|
|
Write-Output "$description $(Format-NumberWithCommas -Number $sizeInMB, 2) MB."
|
|
Write-Output "$description $(Format-NumberWithCommas -Number $sizeInGB, 4) GB."
|
|
}
|
|
|
|
|
|
function Format-NumberWithCommas {
|
|
param (
|
|
[Parameter(Mandatory = $true, Position = 0)]
|
|
# no idea why this has to be System.Object, suggestions don't work
|
|
# for converting [math]::Round result into double and using a type of [double]
|
|
[System.Object]$Number,
|
|
|
|
[Parameter(Position = 1)]
|
|
[int]$NumDecimals = 0
|
|
)
|
|
|
|
# TODO: sort out why this isn't working as expected, works fine on the command line!
|
|
return "{0:N$NumDecimals}" -f $Number
|
|
}
|
|
|
|
# Function to compute file hash
|
|
function Get-FileHash ($path) {
|
|
$stream = [System.IO.File]::OpenRead($path)
|
|
$sha256 = New-Object System.Security.Cryptography.SHA256Managed
|
|
$hash = $sha256.ComputeHash($stream)
|
|
$stream.Close()
|
|
return [BitConverter]::ToString($hash) -replace '-', ''
|
|
}
|
|
|
|
|
|
function Get-AllFilesByHash {
|
|
param (
|
|
[Parameter(Mandatory = $true)]
|
|
[string]$Dirname
|
|
)
|
|
|
|
# Get all files in the directory
|
|
$files = Get-ChildItem -Path $Dirname -File -Recurse
|
|
|
|
# Create a hashtable to store file hashes
|
|
$hashTable = @{}
|
|
|
|
# Iterate through each file and compute hash
|
|
foreach ($file in $files) {
|
|
$hash = Get-FileHash $file.FullName
|
|
|
|
# initialize array for this hash if needed
|
|
if (!$hashTable.ContainsKey($hash)) {
|
|
$hashTable[$hash] = @()
|
|
}
|
|
$ff = [FoundFile]::new($file, $hash)
|
|
#Write-Output $ff.DisplayInfo()
|
|
$hashTable[$hash] += $ff
|
|
}
|
|
$hashTable
|
|
}
|
|
|
|
|
|
function Get-Duplicates {
|
|
param (
|
|
[Parameter(Mandatory = $true)]
|
|
[hashtable] $files
|
|
)
|
|
$dups_hash = @{}
|
|
foreach ($entry in $files.GetEnumerator()) {
|
|
$dup_files = $entry.Value
|
|
|
|
if ($dup_files.Count -gt 1) {
|
|
$dups_hash[$entry.key] = $dup_files | Sort-Object -Property Creation -Descending
|
|
#$dup_files = $dups_hash[$entry.key]
|
|
}
|
|
}
|
|
$dups_hash
|
|
}
|
|
|
|
|
|
function Show-Duplicates {
|
|
param (
|
|
[Parameter(Mandatory = $true)]
|
|
[hashtable] $files
|
|
)
|
|
|
|
$num_dups = 0
|
|
$total_dup_size = 0.0
|
|
|
|
foreach ($entry in $files.GetEnumerator()) {
|
|
$dup_files = $entry.Value
|
|
$num_dups += 1
|
|
|
|
if ($dup_files.Count -lt 2) {
|
|
throw "duplicates collection contains non duplicate for entry: $($entry.Key))"
|
|
}
|
|
|
|
Write-Output ""
|
|
Write-Output "Found duplicate $($num_dups): $($entry.Key)"
|
|
for ($i = 0; $i -lt $dup_files.Count; $i++) {
|
|
$total_dup_size += $dup_files[$i].Size
|
|
Write-Output "$($hash_num): $($dup_files[$i].ToString())"
|
|
}
|
|
}
|
|
Write-Output ""
|
|
Write-Output "Found $num_dups duplicate file hashes."
|
|
ShowSizes "Duplicate Files Size" $total_dup_size
|
|
}
|
|
|
|
|
|
function BackupDuplicateFile {
|
|
param (
|
|
[Parameter(Mandatory = $true)]
|
|
[FoundFile] $file,
|
|
|
|
[Parameter(Mandatory = $true)]
|
|
[string] $backupDirectory
|
|
)
|
|
|
|
# Define the source file path
|
|
$sourceFilePath = Join-Path -Path $file.Directory -ChildPath $file.Name
|
|
|
|
# Get the FileInfo object for the source file
|
|
$fileInfo = Get-Item -Path $sourceFilePath
|
|
|
|
# Define the backup directory
|
|
Write-Output "backup directory is: $backupDirectory"
|
|
|
|
# Ensure the backup directory exists
|
|
if (-not (Test-Path -Path $backupDirectory)) {
|
|
New-Item -Path $backupDirectory -ItemType Directory
|
|
}
|
|
|
|
# Remove the drive letter using Split-Path
|
|
$pathWithoutDrive = Split-Path -Path $File.Directory -NoQualifier
|
|
|
|
# Construct the full backup file path
|
|
#$fullBackupDirectory = Join-Path -Path $backupDirectory -ChildPath $File.Directory
|
|
$fullBackupDirectory = Join-Path -Path $backupDirectory -ChildPath $pathWithoutDrive
|
|
|
|
|
|
|
|
if (-not (Test-Path -Path $fullBackupDirectory)) {
|
|
New-Item -Path $fullBackupDirectory -ItemType Directory
|
|
}
|
|
|
|
# Copy the file to the backup directory
|
|
|
|
$fullDestinationPath = Join-Path -Path $fullBackupDirectory -ChildPath $file.Name
|
|
$fileInfo.CopyTo($fullDestinationPath, $true)
|
|
|
|
# Output the result
|
|
# Write-Output "File '$($fileInfo.FullName)'"
|
|
Write-Output " + backed up to: $fullBackupDirectory"
|
|
}
|
|
|
|
function DeleteDuplicateFile {
|
|
param (
|
|
[Parameter(Mandatory = $true)]
|
|
[FoundFile] $File
|
|
)
|
|
|
|
Write-Output ""
|
|
Write-Output "Deleting File: $($File.Name) $(Format-NumberWithCommas $File.fileSizeInKB) KB"
|
|
BackupDuplicateFile -file $File -backupDirectory $backup_dir
|
|
# Force delete a read-only or protected file
|
|
Write-Output " ... would deleting: $($File.FullName)"
|
|
Remove-Item -Path $File.FullName -Force
|
|
Write-Output " - removed from: $($File.Directory)"
|
|
}
|
|
|
|
|
|
|
|
|
|
# NOTE: Duplicates are pre-sorted by creation date
|
|
function ProcessDuplicates {
|
|
param (
|
|
[Parameter(Mandatory = $true)]
|
|
[hashtable] $files
|
|
)
|
|
|
|
$num_deleted = 0
|
|
$total_deleted_size = 0
|
|
|
|
Write-Output ""
|
|
foreach ($entry in $files.GetEnumerator()) {
|
|
$dup_files = $entry.Value
|
|
if ($dup_files.Count -lt 2) {
|
|
throw "duplicates collection contains non duplicate for entry: $($entry.Key))"
|
|
}
|
|
|
|
# the first file is the newest, array is sorted in descending order by creation date
|
|
# delete all the duplicates older than the first file, element 0
|
|
for ($i = 1; $i -lt $dup_files.Count; $i++) {
|
|
$file_to_delete = $dup_files[$i]
|
|
DeleteDuplicateFile -File $file_to_delete
|
|
$total_deleted_size += $file_to_delete.Size
|
|
$num_deleted += 1
|
|
|
|
}
|
|
|
|
}
|
|
Write-Output ""
|
|
Write-Output "Deleted $num_deleted duplicate files"
|
|
ShowSizes "Recovered Size" $total_deleted_size
|
|
}
|
|
|
|
|
|
CheckUsage
|
|
|
|
$files = Get-AllFilesByHash($check_dir)
|
|
$dups = Get-Duplicates $files
|
|
|
|
if ($show_dups) {
|
|
Show-Duplicates $dups
|
|
}
|
|
|
|
if ($delete) {
|
|
ProcessDuplicates $dups
|
|
}
|
|
|