From 80a42d9d0a4c00cfb82b658abc7b899ddabffaf6 Mon Sep 17 00:00:00 2001 From: Kiril Markov Date: Wed, 7 Apr 2021 14:36:45 +0300 Subject: [PATCH] wip --- Files/ByteSize.cs | 396 ++++++++++++++++++++++++++++++++++++++++++++++ Files/Program.cs | 338 ++++++++++++++++++++++++++++++--------- 2 files changed, 659 insertions(+), 75 deletions(-) create mode 100644 Files/ByteSize.cs diff --git a/Files/ByteSize.cs b/Files/ByteSize.cs new file mode 100644 index 0000000..0d33b4a --- /dev/null +++ b/Files/ByteSize.cs @@ -0,0 +1,396 @@ +using System; +using System.Globalization; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +#nullable enable + +namespace Files +{ + public readonly partial struct ByteSize : IEquatable, IComparable + { + public static readonly ByteSize MinValue = new ByteSize(long.MinValue); + public static readonly ByteSize MaxValue = new ByteSize(long.MaxValue); + public static readonly ByteSize ZeroValue = new ByteSize(0); + + public static ByteSize operator +(ByteSize b1, ByteSize b2) => new ByteSize(b1._bytes + b2._bytes); + public static ByteSize operator +(ByteSize b1, long b2) => new ByteSize(b1._bytes + b2); + public static ByteSize operator +(ByteSize b1, int b2) => new ByteSize(b1._bytes + b2); + public static ByteSize operator ++(ByteSize b) => new ByteSize(b._bytes + 1); + public static ByteSize operator -(ByteSize b) => new ByteSize(-b._bytes); + public static ByteSize operator -(ByteSize b1, ByteSize b2) => new ByteSize(b1._bytes - b2._bytes); + public static ByteSize operator -(ByteSize b1, long b2) => new ByteSize(b1._bytes - b2); + public static ByteSize operator -(ByteSize b1, int b2) => new ByteSize(b1._bytes - b2); + public static ByteSize operator --(ByteSize b) => new ByteSize(b._bytes - 1); + public static bool operator ==(ByteSize b1, ByteSize b2) => b1._bytes == b2._bytes; + public static bool operator !=(ByteSize b1, ByteSize b2) => b1._bytes != b2._bytes; + public static bool operator <(ByteSize b1, ByteSize b2) => b1._bytes < b2._bytes; + public static bool operator <=(ByteSize b1, ByteSize b2) => b1._bytes <= b2._bytes; + public static bool operator >(ByteSize b1, ByteSize b2) => b1._bytes > b2._bytes; + public static bool operator >=(ByteSize b1, ByteSize b2) => b1._bytes >= b2._bytes; + public static implicit operator ByteSize(long bytes) => new ByteSize(bytes); + public static implicit operator ByteSize(int bytes) => new ByteSize(bytes); + public static implicit operator ByteSize(string text) => Parse(text, CultureInfo.CurrentCulture); + public static implicit operator long(ByteSize bytes) => bytes._bytes; + public static explicit operator int(ByteSize bytes) => (int)bytes._bytes; + public static implicit operator string(ByteSize bytes) => bytes.ToString(); + + private readonly long _bytes; + + private ByteSize(long bytes) + { + _bytes = bytes; + } + + public ByteSize Add(ByteSize bs) => new ByteSize(_bytes + bs._bytes); + + public ByteSize AddBytes(long value) => new ByteSize(_bytes + value); + + public int CompareTo(ByteSize other) => _bytes.CompareTo(other._bytes); + + public bool Equals(ByteSize other) => _bytes == other._bytes; + + public override bool Equals(object? obj) => obj is ByteSize other && Equals(other); + + public override int GetHashCode() => _bytes.GetHashCode(); + + public override string ToString() => + ToStringWithDecimalPrefixedUnitName("0.##"); + + public string ToString(string? format) => + ToStringWithDecimalPrefixedUnitName(format); + + public string ToString(IFormatProvider? provider) => + ToStringWithDecimalPrefixedUnitName(provider: provider); + + public string ToString(string? format, IFormatProvider? provider) => + ToStringWithDecimalPrefixedUnitName(format, provider); + + public string ToString(string? format, IFormatProvider? provider, bool useBinaryUnitNamePrefix) + { + return useBinaryUnitNamePrefix + ? ToStringWithBinaryPrefixedUnitName(format, provider) + : ToStringWithDecimalPrefixedUnitName(format, provider); + } + + public string ToString(string? format, IFormatProvider? provider, bool useBinaryUnitNamePrefix, bool useShortUnitName) + { + return useBinaryUnitNamePrefix + ? ToStringWithBinaryPrefixedUnitName(format, provider, useShortUnitName) + : ToStringWithDecimalPrefixedUnitName(format, provider, useShortUnitName); + } + } + + public readonly partial struct ByteSize + { + private const long _oneKibiByte = 1024; + private const long _oneMebiByte = 1024 * _oneKibiByte; + private const long _oneGibiByte = 1024 * _oneMebiByte; + private const long _oneTebiByte = 1024 * _oneGibiByte; + private const long _onePebiByte = 1024 * _oneTebiByte; + private const long _oneExbiByte = 1024 * _onePebiByte; + + public static ByteSize FromKibiBytes(double value) => new ByteSize((long)(value * _oneKibiByte)); + + public static ByteSize FromMebiBytes(double value) => new ByteSize((long)(value * _oneMebiByte)); + + public static ByteSize FromGibiBytes(double value) => new ByteSize((long)(value * _oneGibiByte)); + + public static ByteSize FromTebiBytes(double value) => new ByteSize((long)(value * _oneTebiByte)); + + public static ByteSize FromPebiBytes(double value) => new ByteSize((long)(value * _onePebiByte)); + + public ByteSize AddKibiBytes(double value) => new ByteSize((long)(value * _oneKibiByte) + _bytes); + + public ByteSize AddMebiBytes(double value) => new ByteSize((long)(value * _oneMebiByte) + _bytes); + + public ByteSize AddGibiBytes(double value) => new ByteSize((long)(value * _oneGibiByte) + _bytes); + + public ByteSize AddTebiBytes(double value) => new ByteSize((long)(value * _oneTebiByte) + _bytes); + + public ByteSize AddPebiBytes(double value) => new ByteSize((long)(value * _onePebiByte) + _bytes); + + public double AsKibiBytes => (double)_bytes / _oneKibiByte; + + public double AsMebiBytes => (double)_bytes / _oneMebiByte; + + public double AsGibiBytes => (double)_bytes / _oneGibiByte; + + public double AsTebiBytes => (double)_bytes / _oneTebiByte; + + public double AsPebiBytes => (double)_bytes / _onePebiByte; + + public string ToStringWithBinaryPrefixedShortUnitName(string? format = null, IFormatProvider? provider = null) => + ToStringWithBinaryPrefixedUnitName(format, provider, true); + + public string ToStringWithBinaryPrefixedLongUnitName(string? format = null, IFormatProvider? provider = null) => + ToStringWithBinaryPrefixedUnitName(format, provider, false); + + public string ToStringWithBinaryPrefixedUnitName(string? format = null, IFormatProvider? provider = null, bool useShortUnitName = true) + { + provider ??= CultureInfo.CurrentCulture; + + return _bytes switch + { + var b when b >= _oneExbiByte => + (b / (double)_oneExbiByte).ToString(format, provider) + (useShortUnitName ? " EiB" : b == _oneExbiByte ? " exbibyte" : " exbibytes"), + var b when b >= _onePebiByte => + (b / (double)_onePebiByte).ToString(format, provider) + (useShortUnitName ? " PiB" : b == _onePebiByte ? " pebibyte" : " pebibytes"), + var b when b >= _oneTebiByte => + (b / (double)_oneTebiByte).ToString(format, provider) + (useShortUnitName ? " TiB" : b == _oneTebiByte ? " tebibyte" : " tebibytes"), + var b when b >= _oneGibiByte => + (b / (double)_oneGibiByte).ToString(format, provider) + (useShortUnitName ? " GiB" : b == _oneGibiByte ? " gibibyte" : " gibibytes"), + var b when b >= _oneMebiByte => + (b / (double)_oneMebiByte).ToString(format, provider) + (useShortUnitName ? " MiB" : b == _oneMebiByte ? " mebibyte" : " mebibytes"), + var b when b >= _oneKibiByte => + (b / (double)_oneKibiByte).ToString(format, provider) + (useShortUnitName ? " KiB" : b == _oneKibiByte ? " kibibyte" : " kibibytes"), + var b => + b.ToString(format, provider) + (useShortUnitName ? " B" : b == 1 ? " byte" : " bytes") + }; + } + } + + public readonly partial struct ByteSize + { + private const long _oneKiloByte = 1000; + private const long _oneMegaByte = 1000 * _oneKiloByte; + private const long _oneGigaByte = 1000 * _oneMegaByte; + private const long _oneTeraByte = 1000 * _oneGigaByte; + private const long _onePetaByte = 1000 * _oneTeraByte; + private const long _oneExaByte = 1000 * _onePetaByte; + + public static ByteSize FromKiloBytes(double value) => new ByteSize((long)(value * _oneKiloByte)); + + public static ByteSize FromMegaBytes(double value) => new ByteSize((long)(value * _oneMegaByte)); + + public static ByteSize FromGigaBytes(double value) => new ByteSize((long)(value * _oneGigaByte)); + + public static ByteSize FromTeraBytes(double value) => new ByteSize((long)(value * _oneTeraByte)); + + public static ByteSize FromPetaBytes(double value) => new ByteSize((long)(value * _onePetaByte)); + + public ByteSize AddKiloBytes(double value) => new ByteSize((long)(value * _oneKiloByte) + _bytes); + + public ByteSize AddMegaBytes(double value) => new ByteSize((long)(value * _oneMegaByte) + _bytes); + + public ByteSize AddGigaBytes(double value) => new ByteSize((long)(value * _oneGigaByte) + _bytes); + + public ByteSize AddTeraBytes(double value) => new ByteSize((long)(value * _oneTeraByte) + _bytes); + + public ByteSize AddPetaBytes(double value) => new ByteSize((long)(value * _onePetaByte) + _bytes); + + public double AsKiloBytes => (double)_bytes / _oneKiloByte; + + public double AsMegaBytes => (double)_bytes / _oneMegaByte; + + public double AsGigaBytes => (double)_bytes / _oneGigaByte; + + public double AsTeraBytes => (double)_bytes / _oneTeraByte; + + public double AsPetaBytes => (double)_bytes / _onePetaByte; + + public string ToStringWithDecimalPrefixedShortUnitName(string? format = null, IFormatProvider? provider = null) => + ToStringWithDecimalPrefixedUnitName(format, provider, true); + + public string ToStringWithDecimalPrefixedLongUnitName(string? format = null, IFormatProvider? provider = null) => + ToStringWithDecimalPrefixedUnitName(format, provider, true); + + public string ToStringWithDecimalPrefixedUnitName(string? format = null, IFormatProvider? provider = null, bool useShortUnitName = true) + { + provider ??= CultureInfo.CurrentCulture; + + return _bytes switch + { + var b when b >= _oneExaByte => + (b / (double)_oneExaByte).ToString(format, provider) + (useShortUnitName ? " EB" : b == _oneExaByte ? " exabyte" : " exabytes"), + var b when b >= _onePetaByte => + (b / (double)_onePetaByte).ToString(format, provider) + (useShortUnitName ? " PB" : b == _onePetaByte ? " petabyte" : " petabytes"), + var b when b >= _oneTeraByte => + (b / (double)_oneTeraByte).ToString(format, provider) + (useShortUnitName ? " TB" : b == _oneTeraByte ? " terabyte" : " terabytes"), + var b when b >= _oneGigaByte => + (b / (double)_oneGigaByte).ToString(format, provider) + (useShortUnitName ? " GB" : b == _oneGigaByte ? " gigabyte" : " gigabytes"), + var b when b >= _oneMegaByte => + (b / (double)_oneMegaByte).ToString(format, provider) + (useShortUnitName ? " MB" : b == _oneMegaByte ? " megabyte" : " megabytes"), + var b when b >= _oneKiloByte => + (b / (double)_oneKiloByte).ToString(format, provider) + (useShortUnitName ? " kB" : b == _oneKiloByte ? " kilobyte" : " kilobytes"), + var b => + b.ToString(format, provider) + (useShortUnitName ? " B" : b == 1 ? " byte" : " bytes") + }; + } + } + + public readonly partial struct ByteSize + { + public static readonly Dictionary DefaultMatchesForUnitsOfMeasure = + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["kb"] = _oneKiloByte, + ["kilobyte"] = _oneKiloByte, + ["kilobytes"] = _oneKiloByte, + + ["kib"] = _oneKibiByte, + ["kibibyte"] = _oneKibiByte, + ["kibibytes"] = _oneKibiByte, + + ["mb"] = _oneMegaByte, + ["megabyte"] = _oneMegaByte, + ["megabytes"] = _oneMegaByte, + + ["mib"] = _oneMebiByte, + ["mebibyte"] = _oneMebiByte, + ["mebibytes"] = _oneMebiByte, + + ["gb"] = _oneGigaByte, + ["gigabyte"] = _oneGigaByte, + ["gigabytes"] = _oneGigaByte, + + ["gib"] = _oneGibiByte, + ["gibibyte"] = _oneGibiByte, + ["gibibytes"] = _oneGibiByte, + + ["tb"] = _oneTeraByte, + ["terabyte"] = _oneTeraByte, + ["terabytes"] = _oneTeraByte, + + ["tib"] = _oneTebiByte, + ["tebibyte"] = _oneTebiByte, + ["tebibytes"] = _oneTebiByte, + + ["pb"] = _onePetaByte, + ["petabyte"] = _onePetaByte, + ["petabytes"] = _onePetaByte, + + ["pib"] = _onePebiByte, + ["pebibyte"] = _onePebiByte, + ["pebibytes"] = _onePebiByte, + + ["eb"] = _oneExaByte, + ["exabyte"] = _oneExaByte, + ["exabytes"] = _oneExaByte, + + ["eib"] = _oneExbiByte, + ["exbibyte"] = _oneExbiByte, + ["exbibytes"] = _oneExbiByte, + }; + + public static bool TryParse(string s, out ByteSize size) => + TryParse(s.AsSpan(), CultureInfo.CurrentCulture, out size, DefaultMatchesForUnitsOfMeasure); + + public static bool TryParse(string s, IFormatProvider provider, out ByteSize size) => + TryParse(s.AsSpan(), provider, out size, DefaultMatchesForUnitsOfMeasure); + + public static bool TryParse(string s, IFormatProvider provider, out ByteSize size, + IDictionary unitsOfMeasure) => + TryParse(s.AsSpan(), provider, out size, unitsOfMeasure); + + public static bool TryParse(ReadOnlySpan span, IFormatProvider provider, out ByteSize size, IDictionary unitsOfMeasure) + { + ReadOnlySpan trimmedFromStart = SkipWhitespace(span); + ReadOnlySpan doublePart = TakeUntilWhitespace(trimmedFromStart, out ReadOnlySpan afterDouble); + + ReadOnlySpan unitPart = TakeUntilWhitespace(SkipWhitespace(afterDouble), out _); + + if (!double.TryParse(doublePart.ToString(), NumberStyles.Any, provider, out var parsedDouble)) + { + size = ZeroValue; + return false; + } + + string unit = unitPart.ToString(); + long multiplier; + + if (unitsOfMeasure.ContainsKey(unit)) + { + multiplier = unitsOfMeasure[unit]; + } + else if (string.IsNullOrEmpty(unit)) + { + multiplier = 1; + } + else + { + size = ZeroValue; + return false; + } + + size = (long)(parsedDouble * multiplier); + return true; + } + + + public static ByteSize Parse(string text) => + Parse(text.AsSpan(), CultureInfo.CurrentCulture, DefaultMatchesForUnitsOfMeasure); + + public static ByteSize Parse(string text, IFormatProvider provider) => + Parse(text.AsSpan(), provider, DefaultMatchesForUnitsOfMeasure); + + public static ByteSize Parse(string text, IFormatProvider provider, IDictionary unitsOfMeasure) => + Parse(text.AsSpan(), provider, unitsOfMeasure); + + public static ByteSize Parse(ReadOnlySpan span, IFormatProvider provider, IDictionary unitsOfMeasure) + { + ReadOnlySpan trimmedFromStart = SkipWhitespace(span); + ReadOnlySpan doublePart = TakeUntilWhitespace(trimmedFromStart, out ReadOnlySpan afterDouble); + + var doubleValue = double.Parse(doublePart.ToString(), provider); + + ReadOnlySpan unitPart = TakeUntilWhitespace(SkipWhitespace(afterDouble), out _); + + string unit = unitPart.ToString(); + long multiplier; + + if (unitsOfMeasure.ContainsKey(unit)) + { + multiplier = unitsOfMeasure[unit]; + } + else if (string.IsNullOrEmpty(unit)) + { + multiplier = 1; + } + else + { + throw new ArgumentException("Unknown unit.", nameof(span)); + } + + return (long)(doubleValue * multiplier); + } + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlySpan SkipWhitespace(ReadOnlySpan chars) + { + int start = 0; + for (int i = 0; i < chars.Length; i++) + { + if (char.IsWhiteSpace(chars[i])) + { + start++; + } + else + { + return chars.Slice(start, chars.Length - start); + } + } + + return ReadOnlySpan.Empty; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlySpan TakeUntilWhitespace(ReadOnlySpan chars, out ReadOnlySpan rest) + { + for (int i = 0; i < chars.Length; i++) + { + if (char.IsWhiteSpace(chars[i])) + { + rest = chars.Slice(i); + return chars.Slice(0, i); + } + } + + rest = ReadOnlySpan.Empty; + return chars; + } + } +} + +#nullable restore \ No newline at end of file diff --git a/Files/Program.cs b/Files/Program.cs index d286feb..dc126f0 100644 --- a/Files/Program.cs +++ b/Files/Program.cs @@ -10,68 +10,97 @@ using System.CommandLine.Invocation; using System.CommandLine.Parsing; using System.Threading.Tasks; using System.Linq; +using System.Reflection; +using System.Reflection.Emit; using Dapper; using System.Security.Cryptography; -namespace Files { - class Program { - private static async Task IndexFiles(bool isVerbose, DirectoryInfo startDirectory, CancellationToken ct) { +namespace Files +{ + class Program + { + private static async Task IndexFiles(Configuration configuration, CancellationToken ct) + { await AnsiConsole.Status() - .StartAsync("Thinking...", async ctx => { - using var connection = new SqliteConnection("Data Source=db.db"); + .StartAsync("Thinking...", async ctx => + { + await using var connection = new SqliteConnection("Data Source=db.db"); connection.Open(); - await using var transaction = await connection.BeginTransactionAsync(); + await using var transaction = await connection.BeginTransactionAsync(ct); - var cnt = connection.ExecuteScalar("SELECT count(*) FROM sqlite_master WHERE type='table' AND name=@tableName;", new { tableName = "files" }); - if (cnt == 0) + await InitializeDb(connection); + + if (!configuration.SkipFileScanning) { - connection.Execute("CREATE TABLE IF NOT EXISTS files (name TEXT, size INTEGER, inode INTEGER);"); - } + Stack directoriesStack = new Stack(); + directoriesStack.Push(configuration.InitialDirectory?.ToString() ?? "."); - Queue directoriesQueue = new Queue(); - directoriesQueue.Enqueue(startDirectory?.ToString() ?? "."); + try + { + while (directoriesStack.TryPop(out string peekedDir)) + { + string safePeekedDir = peekedDir.Replace("[", "[[").Replace("]", "]]"); + ctx.Status(safePeekedDir); - try { - while (directoriesQueue.TryDequeue(out string peekedDir)) { - ctx.Status(peekedDir.Replace("[", "[[").Replace("]", "]]")); - - UnixDirectoryInfo dirInfo = new(peekedDir); - if (!dirInfo.CanAccess(Mono.Unix.Native.AccessModes.R_OK) - || !dirInfo.CanAccess(Mono.Unix.Native.AccessModes.X_OK)) { - AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {dirInfo.ToString().Replace("[", "[[").Replace("]", "]]")}"); - return; - } - UnixFileSystemInfo[] entries = dirInfo.GetFileSystemEntries(); - foreach (UnixFileSystemInfo entry in entries) { - string relativePath = Path.Combine(peekedDir, entry.Name); - if (!entry.CanAccess(Mono.Unix.Native.AccessModes.R_OK)) { - if (entry.IsDirectory) - AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {relativePath.Replace("[", "[[").Replace("]", "]]")}"); - else if (entry.IsRegularFile) - AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {relativePath.Replace("[", "[[").Replace("]", "]]")}"); - continue; - } - - if (entry.IsDirectory) { - directoriesQueue.Enqueue(relativePath); - continue; - } - - connection.Execute("INSERT INTO files (name, size, inode) VALUES (@name, @Length, @Inode);", new { name = relativePath, entry.Length, entry.Inode }); - - if (isVerbose) - AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] {relativePath.Replace("[", "[[").Replace("]", "]]")}"); - - if (ct.IsCancellationRequested) + UnixDirectoryInfo dirInfo = new(peekedDir); + if (!dirInfo.CanAccess(Mono.Unix.Native.AccessModes.R_OK) + || !dirInfo.CanAccess(Mono.Unix.Native.AccessModes.X_OK)) + { + AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {dirInfo.ToString().Replace("[", "[[").Replace("]", "]]")}"); return; + } + + if (configuration.BeVerbose) + AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] :file_folder: {safePeekedDir}"); + + var entries = dirInfo.GetFileSystemEntries().OrderByDescending(e => e.Name); + foreach (UnixFileSystemInfo entry in entries) + { + string relativePath = Path.Combine(peekedDir, entry.Name); + string safeRelativePath = relativePath.Replace("[", "[[").Replace("]", "]]"); + + if (!entry.CanAccess(Mono.Unix.Native.AccessModes.R_OK)) + { + if (entry.IsDirectory) + AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {safeRelativePath}"); + else if (entry.IsRegularFile) + AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {safeRelativePath}"); + continue; + } + + if (entry.IsDirectory) + { + directoriesStack.Push(relativePath); + continue; + } + + if (!entry.IsRegularFile) + { + AnsiConsole.MarkupLine($"[red]:cross_mark: NO_FILE:[/] :red_exclamation_mark: {safeRelativePath}"); + continue; + } + + await connection.ExecuteAsync("INSERT OR REPLACE INTO files (name, size, inode) VALUES (@name, @Length, @Inode);", new { name = relativePath, entry.Length, entry.Inode }); + + if (configuration.BeVerbose) + AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] :page_facing_up: {safeRelativePath}"); + + if (ct.IsCancellationRequested) + return; + } } + await transaction.CommitAsync(ct); } - transaction.Commit(); - } catch (Exception exception) { - await transaction.RollbackAsync(); - AnsiConsole.WriteException(exception); - - return; + catch (Exception exception) + { + await transaction.RollbackAsync(ct); + AnsiConsole.WriteException(exception); + + return; + } + + if (configuration.BeVerbose) AnsiConsole.WriteLine(); + } ctx.Status("Finding duplicates..."); @@ -79,86 +108,245 @@ namespace Files { var potential = connection.Query<(int cnt, long size)>("SELECT COUNT(*) cnt, size FROM files WHERE size != 0 GROUP BY size HAVING cnt > 1 ORDER BY size * cnt DESC;"); - foreach (var potentialFile in potential) { + foreach (var potentialFile in potential) + { + if (ct.IsCancellationRequested) + return; + var sameSize = connection.Query("SELECT name, size, inode FROM files WHERE size = @size", new { potentialFile.size }).ToList(); - var unporocessable = sameSize + var recordsWithErrors = sameSize .Where(r => !r.Hash.HasValue); - foreach (var dbRecord in unporocessable) + foreach (var dbRecord in recordsWithErrors) { AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {dbRecord.Name.Replace("[", "[[").Replace("]", "]]")}"); } var equalGrouped = sameSize .Where(r => r.Hash.HasValue) - .GroupBy(r=>r.Hash) - .Where(g=>g.Count() > 1) + .GroupBy(r => r.Hash) + .Where(g => g.Count() > 1) .ToList(); - foreach (var grp in equalGrouped) { - var root = new Tree(":double_exclamation_mark: " + grp.Key); - foreach (var item in grp) { - root.AddNode(item.Name); + foreach (var grp in equalGrouped) + { + if (ct.IsCancellationRequested) + return; + + var records = grp.OrderByDescending(r => r.FileInfo.LinkCount).ToList(); + + DbRecord head = records.First(); + var tail = records.Skip(1).Where(r => r.Inode != head.Inode).ToList(); + var tailWithDuplicates = records.Skip(1).Where(r => r.Inode == head.Inode).ToList(); + + ByteSize totalSize = records.Distinct(new DbRecordEqualityComparerByINode()).Sum(a => a.Size) - head.Size; + + var root = new Tree((head.Size + totalSize).ToStringWithDecimalPrefixedShortUnitName() + " total."); + root.AddNode(((ByteSize)head.Size).ToStringWithDecimalPrefixedShortUnitName() + " " + head.Name.Replace("[", "[[").Replace("]", "]]")); + foreach (var item in tail) + { + if (configuration.EnableLinking) + { + + try + { + // First rename + string tempFileName = item.FileInfo.FullName + ".to_hardlink"; + File.Move(item.FileInfo.FullName, tempFileName); + + try + { + // Then hardlink + head.FileInfo.CreateLink(item.FileInfo.FullName); + + // Then delete + File.Delete(tempFileName); + + root.AddNode("[green]:check_mark:[/] " + + item.Name.Replace("[", "[[").Replace("]", "]]")); + } + catch (Exception) + { + File.Move(tempFileName, item.FileInfo.FullName); + throw; + } + } + catch (Exception exception) + { + AnsiConsole.WriteException(exception, ExceptionFormats.ShortenEverything); + root.AddNode("[red]:cross_mark:[/] " + + item.Name.Replace("[", "[[").Replace("]", "]]")); + } + } + else + { + root.AddNode(((ByteSize)item.Size).ToStringWithDecimalPrefixedShortUnitName() + " " + item.Name.Replace("[", "[[").Replace("]", "]]")); + } + } + + if (configuration.BeVerbose) + foreach (var duplicate in tailWithDuplicates) + { + root.AddNode("[white]:link:[/] 0B " + + duplicate.Name.Replace("[", "[[").Replace("]", "]]")); + } + + if (tail.Any() || configuration.BeVerbose) + { + AnsiConsole.Render(root); + AnsiConsole.WriteLine(); } - AnsiConsole.Render(root); } } - }); + }); } - private static async Task Main(string[] args) { - var verboseOption = new Option(new []{"--verbose", "-v"} ,"Verbose"); + private static async Task InitializeDb(SqliteConnection connection) + { + await connection.ExecuteAsync( + "CREATE TABLE IF NOT EXISTS files " + + "(name TEXT PRIMARY KEY, size INTEGER NOT NULL, inode INTEGER NOT NULL);"); + await connection.ExecuteAsync("CREATE INDEX IF NOT EXISTS idx_files_size ON files(size);"); + } + + private static async Task Main(string[] args) + { + var verboseOption = new Option(new[] { "--verbose", "-v" }, "Verbose"); + var hardlinkOption = new Option(new[] { "--hardlink", "-l" }, "Hardlink duplicates"); + var databaseOption = new Option(new[] { "--keep", "-k" }, () => true, "Keep database."); + var scanOption = new Option(new[] { "--no-scan" }, "Do not scan file system. Reuse database."); var directoryArgument = new Argument( result => new DirectoryInfo("./"), isDefault: true) - { + { Name = "directory", Description = "Directory to scan.", Arity = ArgumentArity.ZeroOrOne, }.ExistingOnly(); - var rootCommand = new RootCommand("$ File -v false ./") + var rootCommand = new RootCommand("Find duplicate files.") { verboseOption, + hardlinkOption, + databaseOption, + scanOption, directoryArgument, }; ParseResult result = rootCommand.Parse(args); + ArgumentResult dirResult = result.FindResultFor(directoryArgument); var dir = new DirectoryInfo( - dirResult.Tokens.FirstOrDefault()?.Value + dirResult.Tokens.FirstOrDefault()?.Value ?? dirResult.Argument.GetDefaultValue()?.ToString()); - rootCommand.Handler = CommandHandler.Create( - async (verbose, ct) => await IndexFiles(verbose, dir, ct)); + var config = new Configuration + { + BeVerbose = result.ValueForOption(verboseOption), + EnableLinking = result.ValueForOption(hardlinkOption), + InitialDirectory = dir, + KeepDatabase = result.ValueForOption(databaseOption), + SkipFileScanning = result.ValueForOption(scanOption), + }; + + rootCommand.Handler = CommandHandler.Create( + async ct => + { + await IndexFiles(config, ct); + }); await rootCommand.InvokeAsync(args); } } - public class DbRecord { - private readonly Lazy _guid; + class Configuration + { + public bool BeVerbose { get; set; } + public bool EnableLinking { get; set; } + public DirectoryInfo InitialDirectory { get; set; } + public bool KeepDatabase { get; set; } + public bool SkipFileScanning { get; set; } + } - public DbRecord() { + public class DbRecord + { + private readonly Lazy _guid; + private readonly Lazy _fileInfo; + + public DbRecord() + { _guid = new Lazy(GetHash); + _fileInfo = new Lazy(GetFileInfo); + } + + public DbRecord(UnixFileInfo fileInfo) + { + _guid = new Lazy(GetHash); + _fileInfo = new Lazy(fileInfo); + Name = fileInfo.GetOriginalPath(); + Size = fileInfo.Length; + Inode = fileInfo.Inode; } public string Name { get; set; } public long Size { get; set; } public long Inode { get; set; } - public Guid? Hash => _guid.Value; - public Guid? GetHash() { - try { + public Guid? Hash => _guid.Value; + public UnixFileInfo FileInfo => _fileInfo.Value; + + private UnixFileInfo GetFileInfo() => new(Name); + + private Guid? GetHash() + { + try + { using FileStream stream = File.OpenRead(Name); var md5 = MD5.Create(); var bytes = md5.ComputeHash(stream); return new Guid(bytes); - } catch { + } + catch + { return null; } } } + + public class DbRecordEqualityComparerByINode : EqualityComparer + { + public override bool Equals(DbRecord x, DbRecord y) + { + return x?.Inode == y?.Inode; + } + + public override int GetHashCode(DbRecord obj) + { + return obj.Inode.GetHashCode(); + } + } + + static class OriginalPathUnixFileSystemInfo + { + private static readonly Func GetOriginalPathFunc; + + static OriginalPathUnixFileSystemInfo() + { + var method = new DynamicMethod("cheat", typeof(string), new[] { typeof(UnixFileSystemInfo) }, typeof(UnixFileSystemInfo), true); + var il = method.GetILGenerator(); + il.Emit(OpCodes.Ldarg_0); + il.Emit(OpCodes.Castclass, typeof(UnixFileSystemInfo)); + il.Emit(OpCodes.Callvirt, typeof(UnixFileSystemInfo) + .GetProperty("OriginalPath", BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic) + .GetGetMethod(true)); + il.Emit(OpCodes.Ret); + GetOriginalPathFunc = (Func)method.CreateDelegate(typeof(Func)); + } + + public static string GetOriginalPath(this UnixFileSystemInfo info) => GetOriginalPathFunc(info); + + public static long GetSizeOnDisk(this UnixFileSystemInfo info) => info.BlockSize * info.BlocksAllocated; + } }