This commit is contained in:
2021-04-07 14:36:45 +03:00
parent 7f39ed6025
commit 80a42d9d0a
2 changed files with 659 additions and 75 deletions

396
Files/ByteSize.cs Normal file
View File

@@ -0,0 +1,396 @@
using System;
using System.Globalization;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
#nullable enable
namespace Files
{
public readonly partial struct ByteSize : IEquatable<ByteSize>, IComparable<ByteSize>
{
public static readonly ByteSize MinValue = new ByteSize(long.MinValue);
public static readonly ByteSize MaxValue = new ByteSize(long.MaxValue);
public static readonly ByteSize ZeroValue = new ByteSize(0);
public static ByteSize operator +(ByteSize b1, ByteSize b2) => new ByteSize(b1._bytes + b2._bytes);
public static ByteSize operator +(ByteSize b1, long b2) => new ByteSize(b1._bytes + b2);
public static ByteSize operator +(ByteSize b1, int b2) => new ByteSize(b1._bytes + b2);
public static ByteSize operator ++(ByteSize b) => new ByteSize(b._bytes + 1);
public static ByteSize operator -(ByteSize b) => new ByteSize(-b._bytes);
public static ByteSize operator -(ByteSize b1, ByteSize b2) => new ByteSize(b1._bytes - b2._bytes);
public static ByteSize operator -(ByteSize b1, long b2) => new ByteSize(b1._bytes - b2);
public static ByteSize operator -(ByteSize b1, int b2) => new ByteSize(b1._bytes - b2);
public static ByteSize operator --(ByteSize b) => new ByteSize(b._bytes - 1);
public static bool operator ==(ByteSize b1, ByteSize b2) => b1._bytes == b2._bytes;
public static bool operator !=(ByteSize b1, ByteSize b2) => b1._bytes != b2._bytes;
public static bool operator <(ByteSize b1, ByteSize b2) => b1._bytes < b2._bytes;
public static bool operator <=(ByteSize b1, ByteSize b2) => b1._bytes <= b2._bytes;
public static bool operator >(ByteSize b1, ByteSize b2) => b1._bytes > b2._bytes;
public static bool operator >=(ByteSize b1, ByteSize b2) => b1._bytes >= b2._bytes;
public static implicit operator ByteSize(long bytes) => new ByteSize(bytes);
public static implicit operator ByteSize(int bytes) => new ByteSize(bytes);
public static implicit operator ByteSize(string text) => Parse(text, CultureInfo.CurrentCulture);
public static implicit operator long(ByteSize bytes) => bytes._bytes;
public static explicit operator int(ByteSize bytes) => (int)bytes._bytes;
public static implicit operator string(ByteSize bytes) => bytes.ToString();
private readonly long _bytes;
private ByteSize(long bytes)
{
_bytes = bytes;
}
public ByteSize Add(ByteSize bs) => new ByteSize(_bytes + bs._bytes);
public ByteSize AddBytes(long value) => new ByteSize(_bytes + value);
public int CompareTo(ByteSize other) => _bytes.CompareTo(other._bytes);
public bool Equals(ByteSize other) => _bytes == other._bytes;
public override bool Equals(object? obj) => obj is ByteSize other && Equals(other);
public override int GetHashCode() => _bytes.GetHashCode();
public override string ToString() =>
ToStringWithDecimalPrefixedUnitName("0.##");
public string ToString(string? format) =>
ToStringWithDecimalPrefixedUnitName(format);
public string ToString(IFormatProvider? provider) =>
ToStringWithDecimalPrefixedUnitName(provider: provider);
public string ToString(string? format, IFormatProvider? provider) =>
ToStringWithDecimalPrefixedUnitName(format, provider);
public string ToString(string? format, IFormatProvider? provider, bool useBinaryUnitNamePrefix)
{
return useBinaryUnitNamePrefix
? ToStringWithBinaryPrefixedUnitName(format, provider)
: ToStringWithDecimalPrefixedUnitName(format, provider);
}
public string ToString(string? format, IFormatProvider? provider, bool useBinaryUnitNamePrefix, bool useShortUnitName)
{
return useBinaryUnitNamePrefix
? ToStringWithBinaryPrefixedUnitName(format, provider, useShortUnitName)
: ToStringWithDecimalPrefixedUnitName(format, provider, useShortUnitName);
}
}
public readonly partial struct ByteSize
{
private const long _oneKibiByte = 1024;
private const long _oneMebiByte = 1024 * _oneKibiByte;
private const long _oneGibiByte = 1024 * _oneMebiByte;
private const long _oneTebiByte = 1024 * _oneGibiByte;
private const long _onePebiByte = 1024 * _oneTebiByte;
private const long _oneExbiByte = 1024 * _onePebiByte;
public static ByteSize FromKibiBytes(double value) => new ByteSize((long)(value * _oneKibiByte));
public static ByteSize FromMebiBytes(double value) => new ByteSize((long)(value * _oneMebiByte));
public static ByteSize FromGibiBytes(double value) => new ByteSize((long)(value * _oneGibiByte));
public static ByteSize FromTebiBytes(double value) => new ByteSize((long)(value * _oneTebiByte));
public static ByteSize FromPebiBytes(double value) => new ByteSize((long)(value * _onePebiByte));
public ByteSize AddKibiBytes(double value) => new ByteSize((long)(value * _oneKibiByte) + _bytes);
public ByteSize AddMebiBytes(double value) => new ByteSize((long)(value * _oneMebiByte) + _bytes);
public ByteSize AddGibiBytes(double value) => new ByteSize((long)(value * _oneGibiByte) + _bytes);
public ByteSize AddTebiBytes(double value) => new ByteSize((long)(value * _oneTebiByte) + _bytes);
public ByteSize AddPebiBytes(double value) => new ByteSize((long)(value * _onePebiByte) + _bytes);
public double AsKibiBytes => (double)_bytes / _oneKibiByte;
public double AsMebiBytes => (double)_bytes / _oneMebiByte;
public double AsGibiBytes => (double)_bytes / _oneGibiByte;
public double AsTebiBytes => (double)_bytes / _oneTebiByte;
public double AsPebiBytes => (double)_bytes / _onePebiByte;
public string ToStringWithBinaryPrefixedShortUnitName(string? format = null, IFormatProvider? provider = null) =>
ToStringWithBinaryPrefixedUnitName(format, provider, true);
public string ToStringWithBinaryPrefixedLongUnitName(string? format = null, IFormatProvider? provider = null) =>
ToStringWithBinaryPrefixedUnitName(format, provider, false);
public string ToStringWithBinaryPrefixedUnitName(string? format = null, IFormatProvider? provider = null, bool useShortUnitName = true)
{
provider ??= CultureInfo.CurrentCulture;
return _bytes switch
{
var b when b >= _oneExbiByte =>
(b / (double)_oneExbiByte).ToString(format, provider) + (useShortUnitName ? " EiB" : b == _oneExbiByte ? " exbibyte" : " exbibytes"),
var b when b >= _onePebiByte =>
(b / (double)_onePebiByte).ToString(format, provider) + (useShortUnitName ? " PiB" : b == _onePebiByte ? " pebibyte" : " pebibytes"),
var b when b >= _oneTebiByte =>
(b / (double)_oneTebiByte).ToString(format, provider) + (useShortUnitName ? " TiB" : b == _oneTebiByte ? " tebibyte" : " tebibytes"),
var b when b >= _oneGibiByte =>
(b / (double)_oneGibiByte).ToString(format, provider) + (useShortUnitName ? " GiB" : b == _oneGibiByte ? " gibibyte" : " gibibytes"),
var b when b >= _oneMebiByte =>
(b / (double)_oneMebiByte).ToString(format, provider) + (useShortUnitName ? " MiB" : b == _oneMebiByte ? " mebibyte" : " mebibytes"),
var b when b >= _oneKibiByte =>
(b / (double)_oneKibiByte).ToString(format, provider) + (useShortUnitName ? " KiB" : b == _oneKibiByte ? " kibibyte" : " kibibytes"),
var b =>
b.ToString(format, provider) + (useShortUnitName ? " B" : b == 1 ? " byte" : " bytes")
};
}
}
public readonly partial struct ByteSize
{
private const long _oneKiloByte = 1000;
private const long _oneMegaByte = 1000 * _oneKiloByte;
private const long _oneGigaByte = 1000 * _oneMegaByte;
private const long _oneTeraByte = 1000 * _oneGigaByte;
private const long _onePetaByte = 1000 * _oneTeraByte;
private const long _oneExaByte = 1000 * _onePetaByte;
public static ByteSize FromKiloBytes(double value) => new ByteSize((long)(value * _oneKiloByte));
public static ByteSize FromMegaBytes(double value) => new ByteSize((long)(value * _oneMegaByte));
public static ByteSize FromGigaBytes(double value) => new ByteSize((long)(value * _oneGigaByte));
public static ByteSize FromTeraBytes(double value) => new ByteSize((long)(value * _oneTeraByte));
public static ByteSize FromPetaBytes(double value) => new ByteSize((long)(value * _onePetaByte));
public ByteSize AddKiloBytes(double value) => new ByteSize((long)(value * _oneKiloByte) + _bytes);
public ByteSize AddMegaBytes(double value) => new ByteSize((long)(value * _oneMegaByte) + _bytes);
public ByteSize AddGigaBytes(double value) => new ByteSize((long)(value * _oneGigaByte) + _bytes);
public ByteSize AddTeraBytes(double value) => new ByteSize((long)(value * _oneTeraByte) + _bytes);
public ByteSize AddPetaBytes(double value) => new ByteSize((long)(value * _onePetaByte) + _bytes);
public double AsKiloBytes => (double)_bytes / _oneKiloByte;
public double AsMegaBytes => (double)_bytes / _oneMegaByte;
public double AsGigaBytes => (double)_bytes / _oneGigaByte;
public double AsTeraBytes => (double)_bytes / _oneTeraByte;
public double AsPetaBytes => (double)_bytes / _onePetaByte;
public string ToStringWithDecimalPrefixedShortUnitName(string? format = null, IFormatProvider? provider = null) =>
ToStringWithDecimalPrefixedUnitName(format, provider, true);
public string ToStringWithDecimalPrefixedLongUnitName(string? format = null, IFormatProvider? provider = null) =>
ToStringWithDecimalPrefixedUnitName(format, provider, true);
public string ToStringWithDecimalPrefixedUnitName(string? format = null, IFormatProvider? provider = null, bool useShortUnitName = true)
{
provider ??= CultureInfo.CurrentCulture;
return _bytes switch
{
var b when b >= _oneExaByte =>
(b / (double)_oneExaByte).ToString(format, provider) + (useShortUnitName ? " EB" : b == _oneExaByte ? " exabyte" : " exabytes"),
var b when b >= _onePetaByte =>
(b / (double)_onePetaByte).ToString(format, provider) + (useShortUnitName ? " PB" : b == _onePetaByte ? " petabyte" : " petabytes"),
var b when b >= _oneTeraByte =>
(b / (double)_oneTeraByte).ToString(format, provider) + (useShortUnitName ? " TB" : b == _oneTeraByte ? " terabyte" : " terabytes"),
var b when b >= _oneGigaByte =>
(b / (double)_oneGigaByte).ToString(format, provider) + (useShortUnitName ? " GB" : b == _oneGigaByte ? " gigabyte" : " gigabytes"),
var b when b >= _oneMegaByte =>
(b / (double)_oneMegaByte).ToString(format, provider) + (useShortUnitName ? " MB" : b == _oneMegaByte ? " megabyte" : " megabytes"),
var b when b >= _oneKiloByte =>
(b / (double)_oneKiloByte).ToString(format, provider) + (useShortUnitName ? " kB" : b == _oneKiloByte ? " kilobyte" : " kilobytes"),
var b =>
b.ToString(format, provider) + (useShortUnitName ? " B" : b == 1 ? " byte" : " bytes")
};
}
}
public readonly partial struct ByteSize
{
public static readonly Dictionary<string, long> DefaultMatchesForUnitsOfMeasure =
new Dictionary<string, long>(StringComparer.OrdinalIgnoreCase)
{
["kb"] = _oneKiloByte,
["kilobyte"] = _oneKiloByte,
["kilobytes"] = _oneKiloByte,
["kib"] = _oneKibiByte,
["kibibyte"] = _oneKibiByte,
["kibibytes"] = _oneKibiByte,
["mb"] = _oneMegaByte,
["megabyte"] = _oneMegaByte,
["megabytes"] = _oneMegaByte,
["mib"] = _oneMebiByte,
["mebibyte"] = _oneMebiByte,
["mebibytes"] = _oneMebiByte,
["gb"] = _oneGigaByte,
["gigabyte"] = _oneGigaByte,
["gigabytes"] = _oneGigaByte,
["gib"] = _oneGibiByte,
["gibibyte"] = _oneGibiByte,
["gibibytes"] = _oneGibiByte,
["tb"] = _oneTeraByte,
["terabyte"] = _oneTeraByte,
["terabytes"] = _oneTeraByte,
["tib"] = _oneTebiByte,
["tebibyte"] = _oneTebiByte,
["tebibytes"] = _oneTebiByte,
["pb"] = _onePetaByte,
["petabyte"] = _onePetaByte,
["petabytes"] = _onePetaByte,
["pib"] = _onePebiByte,
["pebibyte"] = _onePebiByte,
["pebibytes"] = _onePebiByte,
["eb"] = _oneExaByte,
["exabyte"] = _oneExaByte,
["exabytes"] = _oneExaByte,
["eib"] = _oneExbiByte,
["exbibyte"] = _oneExbiByte,
["exbibytes"] = _oneExbiByte,
};
public static bool TryParse(string s, out ByteSize size) =>
TryParse(s.AsSpan(), CultureInfo.CurrentCulture, out size, DefaultMatchesForUnitsOfMeasure);
public static bool TryParse(string s, IFormatProvider provider, out ByteSize size) =>
TryParse(s.AsSpan(), provider, out size, DefaultMatchesForUnitsOfMeasure);
public static bool TryParse(string s, IFormatProvider provider, out ByteSize size,
IDictionary<string, long> unitsOfMeasure) =>
TryParse(s.AsSpan(), provider, out size, unitsOfMeasure);
public static bool TryParse(ReadOnlySpan<char> span, IFormatProvider provider, out ByteSize size, IDictionary<string, long> unitsOfMeasure)
{
ReadOnlySpan<char> trimmedFromStart = SkipWhitespace(span);
ReadOnlySpan<char> doublePart = TakeUntilWhitespace(trimmedFromStart, out ReadOnlySpan<char> afterDouble);
ReadOnlySpan<char> unitPart = TakeUntilWhitespace(SkipWhitespace(afterDouble), out _);
if (!double.TryParse(doublePart.ToString(), NumberStyles.Any, provider, out var parsedDouble))
{
size = ZeroValue;
return false;
}
string unit = unitPart.ToString();
long multiplier;
if (unitsOfMeasure.ContainsKey(unit))
{
multiplier = unitsOfMeasure[unit];
}
else if (string.IsNullOrEmpty(unit))
{
multiplier = 1;
}
else
{
size = ZeroValue;
return false;
}
size = (long)(parsedDouble * multiplier);
return true;
}
public static ByteSize Parse(string text) =>
Parse(text.AsSpan(), CultureInfo.CurrentCulture, DefaultMatchesForUnitsOfMeasure);
public static ByteSize Parse(string text, IFormatProvider provider) =>
Parse(text.AsSpan(), provider, DefaultMatchesForUnitsOfMeasure);
public static ByteSize Parse(string text, IFormatProvider provider, IDictionary<string, long> unitsOfMeasure) =>
Parse(text.AsSpan(), provider, unitsOfMeasure);
public static ByteSize Parse(ReadOnlySpan<char> span, IFormatProvider provider, IDictionary<string, long> unitsOfMeasure)
{
ReadOnlySpan<char> trimmedFromStart = SkipWhitespace(span);
ReadOnlySpan<char> doublePart = TakeUntilWhitespace(trimmedFromStart, out ReadOnlySpan<char> afterDouble);
var doubleValue = double.Parse(doublePart.ToString(), provider);
ReadOnlySpan<char> unitPart = TakeUntilWhitespace(SkipWhitespace(afterDouble), out _);
string unit = unitPart.ToString();
long multiplier;
if (unitsOfMeasure.ContainsKey(unit))
{
multiplier = unitsOfMeasure[unit];
}
else if (string.IsNullOrEmpty(unit))
{
multiplier = 1;
}
else
{
throw new ArgumentException("Unknown unit.", nameof(span));
}
return (long)(doubleValue * multiplier);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ReadOnlySpan<char> SkipWhitespace(ReadOnlySpan<char> chars)
{
int start = 0;
for (int i = 0; i < chars.Length; i++)
{
if (char.IsWhiteSpace(chars[i]))
{
start++;
}
else
{
return chars.Slice(start, chars.Length - start);
}
}
return ReadOnlySpan<char>.Empty;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ReadOnlySpan<char> TakeUntilWhitespace(ReadOnlySpan<char> chars, out ReadOnlySpan<char> rest)
{
for (int i = 0; i < chars.Length; i++)
{
if (char.IsWhiteSpace(chars[i]))
{
rest = chars.Slice(i);
return chars.Slice(0, i);
}
}
rest = ReadOnlySpan<char>.Empty;
return chars;
}
}
}
#nullable restore

View File

@@ -10,83 +10,116 @@ using System.CommandLine.Invocation;
using System.CommandLine.Parsing;
using System.Threading.Tasks;
using System.Linq;
using System.Reflection;
using System.Reflection.Emit;
using Dapper;
using System.Security.Cryptography;
namespace Files {
class Program {
private static async Task IndexFiles(bool isVerbose, DirectoryInfo startDirectory, CancellationToken ct) {
await AnsiConsole.Status()
.StartAsync("Thinking...", async ctx => {
using var connection = new SqliteConnection("Data Source=db.db");
connection.Open();
await using var transaction = await connection.BeginTransactionAsync();
var cnt = connection.ExecuteScalar<int>("SELECT count(*) FROM sqlite_master WHERE type='table' AND name=@tableName;", new { tableName = "files" });
if (cnt == 0)
namespace Files
{
connection.Execute("CREATE TABLE IF NOT EXISTS files (name TEXT, size INTEGER, inode INTEGER);");
}
class Program
{
private static async Task IndexFiles(Configuration configuration, CancellationToken ct)
{
await AnsiConsole.Status()
.StartAsync("Thinking...", async ctx =>
{
await using var connection = new SqliteConnection("Data Source=db.db");
connection.Open();
await using var transaction = await connection.BeginTransactionAsync(ct);
Queue<string> directoriesQueue = new Queue<string>();
directoriesQueue.Enqueue(startDirectory?.ToString() ?? ".");
await InitializeDb(connection);
try {
while (directoriesQueue.TryDequeue(out string peekedDir)) {
ctx.Status(peekedDir.Replace("[", "[[").Replace("]", "]]"));
if (!configuration.SkipFileScanning)
{
Stack<string> directoriesStack = new Stack<string>();
directoriesStack.Push(configuration.InitialDirectory?.ToString() ?? ".");
try
{
while (directoriesStack.TryPop(out string peekedDir))
{
string safePeekedDir = peekedDir.Replace("[", "[[").Replace("]", "]]");
ctx.Status(safePeekedDir);
UnixDirectoryInfo dirInfo = new(peekedDir);
if (!dirInfo.CanAccess(Mono.Unix.Native.AccessModes.R_OK)
|| !dirInfo.CanAccess(Mono.Unix.Native.AccessModes.X_OK)) {
|| !dirInfo.CanAccess(Mono.Unix.Native.AccessModes.X_OK))
{
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {dirInfo.ToString().Replace("[", "[[").Replace("]", "]]")}");
return;
}
UnixFileSystemInfo[] entries = dirInfo.GetFileSystemEntries();
foreach (UnixFileSystemInfo entry in entries) {
if (configuration.BeVerbose)
AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] :file_folder: {safePeekedDir}");
var entries = dirInfo.GetFileSystemEntries().OrderByDescending(e => e.Name);
foreach (UnixFileSystemInfo entry in entries)
{
string relativePath = Path.Combine(peekedDir, entry.Name);
if (!entry.CanAccess(Mono.Unix.Native.AccessModes.R_OK)) {
string safeRelativePath = relativePath.Replace("[", "[[").Replace("]", "]]");
if (!entry.CanAccess(Mono.Unix.Native.AccessModes.R_OK))
{
if (entry.IsDirectory)
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {relativePath.Replace("[", "[[").Replace("]", "]]")}");
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {safeRelativePath}");
else if (entry.IsRegularFile)
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {relativePath.Replace("[", "[[").Replace("]", "]]")}");
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {safeRelativePath}");
continue;
}
if (entry.IsDirectory) {
directoriesQueue.Enqueue(relativePath);
if (entry.IsDirectory)
{
directoriesStack.Push(relativePath);
continue;
}
connection.Execute("INSERT INTO files (name, size, inode) VALUES (@name, @Length, @Inode);", new { name = relativePath, entry.Length, entry.Inode });
if (!entry.IsRegularFile)
{
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_FILE:[/] :red_exclamation_mark: {safeRelativePath}");
continue;
}
if (isVerbose)
AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] {relativePath.Replace("[", "[[").Replace("]", "]]")}");
await connection.ExecuteAsync("INSERT OR REPLACE INTO files (name, size, inode) VALUES (@name, @Length, @Inode);", new { name = relativePath, entry.Length, entry.Inode });
if (configuration.BeVerbose)
AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] :page_facing_up: {safeRelativePath}");
if (ct.IsCancellationRequested)
return;
}
}
transaction.Commit();
} catch (Exception exception) {
await transaction.RollbackAsync();
await transaction.CommitAsync(ct);
}
catch (Exception exception)
{
await transaction.RollbackAsync(ct);
AnsiConsole.WriteException(exception);
return;
}
if (configuration.BeVerbose) AnsiConsole.WriteLine();
}
ctx.Status("Finding duplicates...");
ctx.Spinner(Spinner.Known.Aesthetic);
var potential = connection.Query<(int cnt, long size)>("SELECT COUNT(*) cnt, size FROM files WHERE size != 0 GROUP BY size HAVING cnt > 1 ORDER BY size * cnt DESC;");
foreach (var potentialFile in potential) {
foreach (var potentialFile in potential)
{
if (ct.IsCancellationRequested)
return;
var sameSize = connection.Query<DbRecord>("SELECT name, size, inode FROM files WHERE size = @size",
new { potentialFile.size }).ToList();
var unporocessable = sameSize
var recordsWithErrors = sameSize
.Where(r => !r.Hash.HasValue);
foreach (var dbRecord in unporocessable)
foreach (var dbRecord in recordsWithErrors)
{
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {dbRecord.Name.Replace("[", "[[").Replace("]", "]]")}");
}
@@ -97,20 +130,94 @@ namespace Files {
.Where(g => g.Count() > 1)
.ToList();
foreach (var grp in equalGrouped) {
var root = new Tree(":double_exclamation_mark: " + grp.Key);
foreach (var item in grp) {
root.AddNode(item.Name);
foreach (var grp in equalGrouped)
{
if (ct.IsCancellationRequested)
return;
var records = grp.OrderByDescending(r => r.FileInfo.LinkCount).ToList();
DbRecord head = records.First();
var tail = records.Skip(1).Where(r => r.Inode != head.Inode).ToList();
var tailWithDuplicates = records.Skip(1).Where(r => r.Inode == head.Inode).ToList();
ByteSize totalSize = records.Distinct(new DbRecordEqualityComparerByINode()).Sum(a => a.Size) - head.Size;
var root = new Tree((head.Size + totalSize).ToStringWithDecimalPrefixedShortUnitName() + " total.");
root.AddNode(((ByteSize)head.Size).ToStringWithDecimalPrefixedShortUnitName() + " " + head.Name.Replace("[", "[[").Replace("]", "]]"));
foreach (var item in tail)
{
if (configuration.EnableLinking)
{
try
{
// First rename
string tempFileName = item.FileInfo.FullName + ".to_hardlink";
File.Move(item.FileInfo.FullName, tempFileName);
try
{
// Then hardlink
head.FileInfo.CreateLink(item.FileInfo.FullName);
// Then delete
File.Delete(tempFileName);
root.AddNode("[green]:check_mark:[/] " +
item.Name.Replace("[", "[[").Replace("]", "]]"));
}
catch (Exception)
{
File.Move(tempFileName, item.FileInfo.FullName);
throw;
}
}
catch (Exception exception)
{
AnsiConsole.WriteException(exception, ExceptionFormats.ShortenEverything);
root.AddNode("[red]:cross_mark:[/] " +
item.Name.Replace("[", "[[").Replace("]", "]]"));
}
}
else
{
root.AddNode(((ByteSize)item.Size).ToStringWithDecimalPrefixedShortUnitName() + " " + item.Name.Replace("[", "[[").Replace("]", "]]"));
}
}
if (configuration.BeVerbose)
foreach (var duplicate in tailWithDuplicates)
{
root.AddNode("[white]:link:[/] 0B " +
duplicate.Name.Replace("[", "[[").Replace("]", "]]"));
}
if (tail.Any() || configuration.BeVerbose)
{
AnsiConsole.Render(root);
AnsiConsole.WriteLine();
}
}
}
});
}
private static async Task Main(string[] args) {
private static async Task InitializeDb(SqliteConnection connection)
{
await connection.ExecuteAsync(
"CREATE TABLE IF NOT EXISTS files " +
"(name TEXT PRIMARY KEY, size INTEGER NOT NULL, inode INTEGER NOT NULL);");
await connection.ExecuteAsync("CREATE INDEX IF NOT EXISTS idx_files_size ON files(size);");
}
private static async Task Main(string[] args)
{
var verboseOption = new Option<bool>(new[] { "--verbose", "-v" }, "Verbose");
var hardlinkOption = new Option<bool>(new[] { "--hardlink", "-l" }, "Hardlink duplicates");
var databaseOption = new Option<bool>(new[] { "--keep", "-k" }, () => true, "Keep database.");
var scanOption = new Option<bool>(new[] { "--no-scan" }, "Do not scan file system. Reuse database.");
var directoryArgument = new Argument<DirectoryInfo>(
result => new DirectoryInfo("./"), isDefault: true)
{
@@ -119,46 +226,127 @@ namespace Files {
Arity = ArgumentArity.ZeroOrOne,
}.ExistingOnly();
var rootCommand = new RootCommand("$ File -v false ./")
var rootCommand = new RootCommand("Find duplicate files.")
{
verboseOption,
hardlinkOption,
databaseOption,
scanOption,
directoryArgument,
};
ParseResult result = rootCommand.Parse(args);
ArgumentResult dirResult = result.FindResultFor(directoryArgument);
var dir = new DirectoryInfo(
dirResult.Tokens.FirstOrDefault()?.Value
?? dirResult.Argument.GetDefaultValue()?.ToString());
rootCommand.Handler = CommandHandler.Create<bool, CancellationToken>(
async (verbose, ct) => await IndexFiles(verbose, dir, ct));
var config = new Configuration
{
BeVerbose = result.ValueForOption(verboseOption),
EnableLinking = result.ValueForOption(hardlinkOption),
InitialDirectory = dir,
KeepDatabase = result.ValueForOption(databaseOption),
SkipFileScanning = result.ValueForOption(scanOption),
};
rootCommand.Handler = CommandHandler.Create<CancellationToken>(
async ct =>
{
await IndexFiles(config, ct);
});
await rootCommand.InvokeAsync(args);
}
}
public class DbRecord {
private readonly Lazy<Guid?> _guid;
class Configuration
{
public bool BeVerbose { get; set; }
public bool EnableLinking { get; set; }
public DirectoryInfo InitialDirectory { get; set; }
public bool KeepDatabase { get; set; }
public bool SkipFileScanning { get; set; }
}
public DbRecord() {
public class DbRecord
{
private readonly Lazy<Guid?> _guid;
private readonly Lazy<UnixFileInfo> _fileInfo;
public DbRecord()
{
_guid = new Lazy<Guid?>(GetHash);
_fileInfo = new Lazy<UnixFileInfo>(GetFileInfo);
}
public DbRecord(UnixFileInfo fileInfo)
{
_guid = new Lazy<Guid?>(GetHash);
_fileInfo = new Lazy<UnixFileInfo>(fileInfo);
Name = fileInfo.GetOriginalPath();
Size = fileInfo.Length;
Inode = fileInfo.Inode;
}
public string Name { get; set; }
public long Size { get; set; }
public long Inode { get; set; }
public Guid? Hash => _guid.Value;
public Guid? GetHash() {
try {
public Guid? Hash => _guid.Value;
public UnixFileInfo FileInfo => _fileInfo.Value;
private UnixFileInfo GetFileInfo() => new(Name);
private Guid? GetHash()
{
try
{
using FileStream stream = File.OpenRead(Name);
var md5 = MD5.Create();
var bytes = md5.ComputeHash(stream);
return new Guid(bytes);
} catch {
}
catch
{
return null;
}
}
}
public class DbRecordEqualityComparerByINode : EqualityComparer<DbRecord>
{
public override bool Equals(DbRecord x, DbRecord y)
{
return x?.Inode == y?.Inode;
}
public override int GetHashCode(DbRecord obj)
{
return obj.Inode.GetHashCode();
}
}
static class OriginalPathUnixFileSystemInfo
{
private static readonly Func<UnixFileSystemInfo, string> GetOriginalPathFunc;
static OriginalPathUnixFileSystemInfo()
{
var method = new DynamicMethod("cheat", typeof(string), new[] { typeof(UnixFileSystemInfo) }, typeof(UnixFileSystemInfo), true);
var il = method.GetILGenerator();
il.Emit(OpCodes.Ldarg_0);
il.Emit(OpCodes.Castclass, typeof(UnixFileSystemInfo));
il.Emit(OpCodes.Callvirt, typeof(UnixFileSystemInfo)
.GetProperty("OriginalPath", BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic)
.GetGetMethod(true));
il.Emit(OpCodes.Ret);
GetOriginalPathFunc = (Func<UnixFileSystemInfo, string>)method.CreateDelegate(typeof(Func<UnixFileSystemInfo, string>));
}
public static string GetOriginalPath(this UnixFileSystemInfo info) => GetOriginalPathFunc(info);
public static long GetSizeOnDisk(this UnixFileSystemInfo info) => info.BlockSize * info.BlocksAllocated;
}
}