first commit
This commit is contained in:
6
windows/README.md
Executable file
6
windows/README.md
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
|
||||||
|
|
||||||
|
## `frd.ps1` - find and remove duplicates **WIP**
|
||||||
|
- Edit the directory in the file and run it to get a report of files to be removed
|
||||||
|
- Removal step is pending further testing
|
||||||
|
|
||||||
238
windows/frd.ps1
Executable file
238
windows/frd.ps1
Executable file
@ -0,0 +1,238 @@
|
|||||||
|
# Find and Remove Duplicates
|
||||||
|
|
||||||
|
# This is the directory we'll check for duplicates
|
||||||
|
# TODO: make this a parameter
|
||||||
|
$check_dir = "C:\Program Files\Microsoft Office\Updates\Download\PackageFiles"
|
||||||
|
|
||||||
|
# Methodolgy
|
||||||
|
# A hash value is computed for each file, files with the same hash are considered duplicate
|
||||||
|
# Sort the duplicate files found for a hash by their creation date in ascending order
|
||||||
|
# Deletion canidate will be the first file in the list
|
||||||
|
|
||||||
|
|
||||||
|
class FoundFile {
|
||||||
|
[string]$Name
|
||||||
|
[string]$Directory
|
||||||
|
[int]$Size
|
||||||
|
[double]$fileSizeInMB
|
||||||
|
[double]$fileSizeInKB
|
||||||
|
[datetime]$Creation
|
||||||
|
[string]$Hash
|
||||||
|
|
||||||
|
FoundFile([System.IO.FileInfo] $file, [string]$hash) {
|
||||||
|
$this.Name = $file.Name
|
||||||
|
$this.Directory = $file.DirectoryName
|
||||||
|
$this.Size = $file.Length
|
||||||
|
$this.fileSizeInMB = [math]::Round($this.Size / 1MB, 4)
|
||||||
|
$this.fileSizeInKB = [math]::Round($this.Size / 1KB, 2)
|
||||||
|
$this.Creation = $file.CreationTimeUtc
|
||||||
|
$this.Hash = $hash
|
||||||
|
}
|
||||||
|
|
||||||
|
# Method to display information
|
||||||
|
[void]DisplayInfo() {
|
||||||
|
Write-Output "hello"
|
||||||
|
}
|
||||||
|
|
||||||
|
[string]ToString() {
|
||||||
|
return "$($this.Directory) $($this.Name) $(Format-NumberWithCommas $this.fileSizeInKB) KB $($this.Creation)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function ShowSizes() {
|
||||||
|
param (
|
||||||
|
[Parameter(Mandatory = $true, Position = 0)]
|
||||||
|
[string]$description,
|
||||||
|
|
||||||
|
[Parameter(Mandatory = $true, Position = 1)]
|
||||||
|
[int]$sizeInBytes
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert the size to kilobytes (KB)
|
||||||
|
$sizeInKB = $sizeInBytes / 1KB
|
||||||
|
|
||||||
|
# Convert the size to megabytes (MB)
|
||||||
|
$sizeInMB = $sizeInBytes / 1MB
|
||||||
|
|
||||||
|
# Convert the size to gigabytes (GB)
|
||||||
|
$sizeInGB = $sizeInBytes / 1GB
|
||||||
|
|
||||||
|
# Display the size in different units
|
||||||
|
Write-Output ""
|
||||||
|
Write-Output "$description $(Format-NumberWithCommas -Number $sizeInBytes) bytes."
|
||||||
|
Write-Output "$description $(Format-NumberWithCommas -Number $sizeInKB, 2) KB."
|
||||||
|
Write-Output "$description $(Format-NumberWithCommas -Number $sizeInMB, 2) MB."
|
||||||
|
Write-Output "$description $(Format-NumberWithCommas -Number $sizeInGB, 4) GB."
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function Format-NumberWithCommas {
|
||||||
|
param (
|
||||||
|
[Parameter(Mandatory = $true, Position = 0)]
|
||||||
|
# no idea why this has to be System.Object, suggestions don't work
|
||||||
|
# for converting [math]::Round result into double and using a type of [double]
|
||||||
|
[System.Object]$Number,
|
||||||
|
|
||||||
|
[Parameter(Position = 1)]
|
||||||
|
[int]$NumDecimals = 0
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: sort out why this isn't working as expected, works fine on the command line!
|
||||||
|
return "{0:N$NumDecimals}" -f $Number
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to compute file hash
|
||||||
|
function Get-FileHash ($path) {
|
||||||
|
$stream = [System.IO.File]::OpenRead($path)
|
||||||
|
$sha256 = New-Object System.Security.Cryptography.SHA256Managed
|
||||||
|
$hash = $sha256.ComputeHash($stream)
|
||||||
|
$stream.Close()
|
||||||
|
return [BitConverter]::ToString($hash) -replace '-', ''
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function Get-AllFilesByHash {
|
||||||
|
param (
|
||||||
|
[Parameter(Mandatory = $true)]
|
||||||
|
[string]$Dirname
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get all files in the directory
|
||||||
|
$files = Get-ChildItem -Path $Dirname -File -Recurse
|
||||||
|
|
||||||
|
# Create a hashtable to store file hashes
|
||||||
|
$hashTable = @{}
|
||||||
|
|
||||||
|
# Iterate through each file and compute hash
|
||||||
|
foreach ($file in $files) {
|
||||||
|
$hash = Get-FileHash $file.FullName
|
||||||
|
|
||||||
|
# initialize array for this hash if needed
|
||||||
|
if (!$hashTable.ContainsKey($hash)) {
|
||||||
|
$hashTable[$hash] = @()
|
||||||
|
}
|
||||||
|
$ff = [FoundFile]::new($file, $hash)
|
||||||
|
#Write-Output $ff.DisplayInfo()
|
||||||
|
$hashTable[$hash] += $ff
|
||||||
|
}
|
||||||
|
$hashTable
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function Get-Duplicates {
|
||||||
|
param (
|
||||||
|
[Parameter(Mandatory = $true)]
|
||||||
|
[hashtable] $files
|
||||||
|
)
|
||||||
|
$dups_hash = @{}
|
||||||
|
foreach ($entry in $files.GetEnumerator()) {
|
||||||
|
$dup_files = $entry.Value
|
||||||
|
|
||||||
|
if ($dup_files.Count -gt 1) {
|
||||||
|
$dups_hash[$entry.key] = $dup_files | Sort-Object -Property Creation
|
||||||
|
#$dup_files = $dups_hash[$entry.key]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$dups_hash
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function Show-Duplicates {
|
||||||
|
param (
|
||||||
|
[Parameter(Mandatory = $true)]
|
||||||
|
[hashtable] $files
|
||||||
|
)
|
||||||
|
|
||||||
|
#$hash_num = 0
|
||||||
|
$num_dups = 0
|
||||||
|
$total_dup_size = 0
|
||||||
|
|
||||||
|
foreach ($entry in $files.GetEnumerator()) {
|
||||||
|
$dup_files = $entry.Value
|
||||||
|
$num_dups += 1
|
||||||
|
|
||||||
|
if ($dup_files.Count -lt 2) {
|
||||||
|
throw "duplicates collection contains non duplicate for entry: $($entry.Key))"
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Output ""
|
||||||
|
Write-Output "Found duplicate $($num_dups): $($entry.Key)"
|
||||||
|
for ($i = 0; $i -lt $dup_files.Count; $i++) {
|
||||||
|
$total_dup_size += $dup_files[$i].Size
|
||||||
|
Write-Output "$($hash_num): $($dup_files[$i].ToString())"
|
||||||
|
}
|
||||||
|
|
||||||
|
#$hash_num += 1
|
||||||
|
}
|
||||||
|
Write-Output ""
|
||||||
|
Write-Output "Found $num_dups duplicate file hashes."
|
||||||
|
ShowSizes "Duplicate Files Size" $total_dup_size
|
||||||
|
#Write-Output "total dup size = $(Format-NumberWithCommas $total_dup_size) Bytes, $(Format-NumberWithCommas $total_dup_mb) MB"
|
||||||
|
}
|
||||||
|
|
||||||
|
function Show-Duplicates-v1 {
|
||||||
|
param (
|
||||||
|
[Parameter(Mandatory = $true)]
|
||||||
|
[hashtable] $files
|
||||||
|
)
|
||||||
|
|
||||||
|
$hash_num = 0
|
||||||
|
$total_dup_size = 0
|
||||||
|
$num_dups = 0
|
||||||
|
foreach ($entry in $files.GetEnumerator()) {
|
||||||
|
$dup_files = $entry.Value
|
||||||
|
|
||||||
|
if ($dup_files.Count -gt 1) {
|
||||||
|
$num_dups++
|
||||||
|
Write-Output ""
|
||||||
|
Write-Output "Found duplicate $($num_dups): $($entry.Key)"
|
||||||
|
for ($i = 0; $i -lt $dup_files.Count; $i++) {
|
||||||
|
$total_dup_size += $dup_files[$i].Size
|
||||||
|
Write-Output "$($hash_num): $($dup_files[$i].ToString())"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$hash_num += 1
|
||||||
|
}
|
||||||
|
#$total_dup_mb = [math]::Round($total_dup_size / 1MB, 4)
|
||||||
|
|
||||||
|
Write-Output ""
|
||||||
|
Write-Output "Found $num_dups duplicate file hashes."
|
||||||
|
ShowSizes "Duplicate Files Size" $total_dup_size
|
||||||
|
#Write-Output "total dup size = $(Format-NumberWithCommas $total_dup_size) Bytes, $(Format-NumberWithCommas $total_dup_mb) MB"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function TBD_Get-Duplicates {
|
||||||
|
throw "not implemented"
|
||||||
|
# need to sort the dups by date before deleteing
|
||||||
|
|
||||||
|
$dupsTable = @{}
|
||||||
|
|
||||||
|
$total_dup_size = 0
|
||||||
|
|
||||||
|
# Find and remove duplicate files
|
||||||
|
foreach ($hash in $hashTable.Keys) {
|
||||||
|
$fileGroup = $hashTable[$hash]
|
||||||
|
if ($fileGroup.Count -gt 1) {
|
||||||
|
# Keep the first file and delete the rest
|
||||||
|
$fileGroup[1..($fileGroup.Count - 1)] | ForEach-Object {
|
||||||
|
Write-Output "Would Delete duplicate file: $_"
|
||||||
|
$file = Get-Item -Path $_
|
||||||
|
$fileSize = $file.Length
|
||||||
|
$fileName = $file.Name
|
||||||
|
$total_dup_size += $fileSize
|
||||||
|
$dupsTable[$fileName] = $fileSize
|
||||||
|
# Remove-Item -Path $_ -Force
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
$files = Get-AllFilesByHash($check_dir) | Select-Object -First 10
|
||||||
|
$dups = Get-Duplicates $files
|
||||||
|
Show-Duplicates $dups
|
||||||
|
|
||||||
Reference in New Issue
Block a user