Compare commits

...

4 Commits

Author SHA1 Message Date
cf67f0e59a Optimizing enumeration 2021-04-13 12:14:55 +03:00
1fab2e09e8 wip 2021-04-12 08:59:46 +03:00
80a42d9d0a wip 2021-04-07 14:36:45 +03:00
7f39ed6025 remove database vrom VC 2021-04-07 07:50:03 +03:00
7 changed files with 1385 additions and 86 deletions

2
.gitignore vendored
View File

@@ -361,3 +361,5 @@ MigrationBackup/
# Fody - auto-generated XML schema
FodyWeavers.xsd
/Files/db.db

396
Files/ByteSize.cs Normal file
View File

@@ -0,0 +1,396 @@
using System;
using System.Globalization;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
#nullable enable
namespace Files
{
public readonly partial struct ByteSize : IEquatable<ByteSize>, IComparable<ByteSize>
{
public static readonly ByteSize MinValue = new ByteSize(long.MinValue);
public static readonly ByteSize MaxValue = new ByteSize(long.MaxValue);
public static readonly ByteSize ZeroValue = new ByteSize(0);
public static ByteSize operator +(ByteSize b1, ByteSize b2) => new ByteSize(b1._bytes + b2._bytes);
public static ByteSize operator +(ByteSize b1, long b2) => new ByteSize(b1._bytes + b2);
public static ByteSize operator +(ByteSize b1, int b2) => new ByteSize(b1._bytes + b2);
public static ByteSize operator ++(ByteSize b) => new ByteSize(b._bytes + 1);
public static ByteSize operator -(ByteSize b) => new ByteSize(-b._bytes);
public static ByteSize operator -(ByteSize b1, ByteSize b2) => new ByteSize(b1._bytes - b2._bytes);
public static ByteSize operator -(ByteSize b1, long b2) => new ByteSize(b1._bytes - b2);
public static ByteSize operator -(ByteSize b1, int b2) => new ByteSize(b1._bytes - b2);
public static ByteSize operator --(ByteSize b) => new ByteSize(b._bytes - 1);
public static bool operator ==(ByteSize b1, ByteSize b2) => b1._bytes == b2._bytes;
public static bool operator !=(ByteSize b1, ByteSize b2) => b1._bytes != b2._bytes;
public static bool operator <(ByteSize b1, ByteSize b2) => b1._bytes < b2._bytes;
public static bool operator <=(ByteSize b1, ByteSize b2) => b1._bytes <= b2._bytes;
public static bool operator >(ByteSize b1, ByteSize b2) => b1._bytes > b2._bytes;
public static bool operator >=(ByteSize b1, ByteSize b2) => b1._bytes >= b2._bytes;
public static implicit operator ByteSize(long bytes) => new ByteSize(bytes);
public static implicit operator ByteSize(int bytes) => new ByteSize(bytes);
public static implicit operator ByteSize(string text) => Parse(text, CultureInfo.CurrentCulture);
public static implicit operator long(ByteSize bytes) => bytes._bytes;
public static explicit operator int(ByteSize bytes) => (int)bytes._bytes;
public static implicit operator string(ByteSize bytes) => bytes.ToString();
private readonly long _bytes;
private ByteSize(long bytes)
{
_bytes = bytes;
}
public ByteSize Add(ByteSize bs) => new ByteSize(_bytes + bs._bytes);
public ByteSize AddBytes(long value) => new ByteSize(_bytes + value);
public int CompareTo(ByteSize other) => _bytes.CompareTo(other._bytes);
public bool Equals(ByteSize other) => _bytes == other._bytes;
public override bool Equals(object? obj) => obj is ByteSize other && Equals(other);
public override int GetHashCode() => _bytes.GetHashCode();
public override string ToString() =>
ToStringWithDecimalPrefixedUnitName("0.##");
public string ToString(string? format) =>
ToStringWithDecimalPrefixedUnitName(format);
public string ToString(IFormatProvider? provider) =>
ToStringWithDecimalPrefixedUnitName(provider: provider);
public string ToString(string? format, IFormatProvider? provider) =>
ToStringWithDecimalPrefixedUnitName(format, provider);
public string ToString(string? format, IFormatProvider? provider, bool useBinaryUnitNamePrefix)
{
return useBinaryUnitNamePrefix
? ToStringWithBinaryPrefixedUnitName(format, provider)
: ToStringWithDecimalPrefixedUnitName(format, provider);
}
public string ToString(string? format, IFormatProvider? provider, bool useBinaryUnitNamePrefix, bool useShortUnitName)
{
return useBinaryUnitNamePrefix
? ToStringWithBinaryPrefixedUnitName(format, provider, useShortUnitName)
: ToStringWithDecimalPrefixedUnitName(format, provider, useShortUnitName);
}
}
public readonly partial struct ByteSize
{
private const long _oneKibiByte = 1024;
private const long _oneMebiByte = 1024 * _oneKibiByte;
private const long _oneGibiByte = 1024 * _oneMebiByte;
private const long _oneTebiByte = 1024 * _oneGibiByte;
private const long _onePebiByte = 1024 * _oneTebiByte;
private const long _oneExbiByte = 1024 * _onePebiByte;
public static ByteSize FromKibiBytes(double value) => new ByteSize((long)(value * _oneKibiByte));
public static ByteSize FromMebiBytes(double value) => new ByteSize((long)(value * _oneMebiByte));
public static ByteSize FromGibiBytes(double value) => new ByteSize((long)(value * _oneGibiByte));
public static ByteSize FromTebiBytes(double value) => new ByteSize((long)(value * _oneTebiByte));
public static ByteSize FromPebiBytes(double value) => new ByteSize((long)(value * _onePebiByte));
public ByteSize AddKibiBytes(double value) => new ByteSize((long)(value * _oneKibiByte) + _bytes);
public ByteSize AddMebiBytes(double value) => new ByteSize((long)(value * _oneMebiByte) + _bytes);
public ByteSize AddGibiBytes(double value) => new ByteSize((long)(value * _oneGibiByte) + _bytes);
public ByteSize AddTebiBytes(double value) => new ByteSize((long)(value * _oneTebiByte) + _bytes);
public ByteSize AddPebiBytes(double value) => new ByteSize((long)(value * _onePebiByte) + _bytes);
public double AsKibiBytes => (double)_bytes / _oneKibiByte;
public double AsMebiBytes => (double)_bytes / _oneMebiByte;
public double AsGibiBytes => (double)_bytes / _oneGibiByte;
public double AsTebiBytes => (double)_bytes / _oneTebiByte;
public double AsPebiBytes => (double)_bytes / _onePebiByte;
public string ToStringWithBinaryPrefixedShortUnitName(string? format = null, IFormatProvider? provider = null) =>
ToStringWithBinaryPrefixedUnitName(format, provider, true);
public string ToStringWithBinaryPrefixedLongUnitName(string? format = null, IFormatProvider? provider = null) =>
ToStringWithBinaryPrefixedUnitName(format, provider, false);
public string ToStringWithBinaryPrefixedUnitName(string? format = null, IFormatProvider? provider = null, bool useShortUnitName = true)
{
provider ??= CultureInfo.CurrentCulture;
return _bytes switch
{
var b when b >= _oneExbiByte =>
(b / (double)_oneExbiByte).ToString(format, provider) + (useShortUnitName ? " EiB" : b == _oneExbiByte ? " exbibyte" : " exbibytes"),
var b when b >= _onePebiByte =>
(b / (double)_onePebiByte).ToString(format, provider) + (useShortUnitName ? " PiB" : b == _onePebiByte ? " pebibyte" : " pebibytes"),
var b when b >= _oneTebiByte =>
(b / (double)_oneTebiByte).ToString(format, provider) + (useShortUnitName ? " TiB" : b == _oneTebiByte ? " tebibyte" : " tebibytes"),
var b when b >= _oneGibiByte =>
(b / (double)_oneGibiByte).ToString(format, provider) + (useShortUnitName ? " GiB" : b == _oneGibiByte ? " gibibyte" : " gibibytes"),
var b when b >= _oneMebiByte =>
(b / (double)_oneMebiByte).ToString(format, provider) + (useShortUnitName ? " MiB" : b == _oneMebiByte ? " mebibyte" : " mebibytes"),
var b when b >= _oneKibiByte =>
(b / (double)_oneKibiByte).ToString(format, provider) + (useShortUnitName ? " KiB" : b == _oneKibiByte ? " kibibyte" : " kibibytes"),
var b =>
b.ToString(format, provider) + (useShortUnitName ? " B" : b == 1 ? " byte" : " bytes")
};
}
}
public readonly partial struct ByteSize
{
private const long _oneKiloByte = 1000;
private const long _oneMegaByte = 1000 * _oneKiloByte;
private const long _oneGigaByte = 1000 * _oneMegaByte;
private const long _oneTeraByte = 1000 * _oneGigaByte;
private const long _onePetaByte = 1000 * _oneTeraByte;
private const long _oneExaByte = 1000 * _onePetaByte;
public static ByteSize FromKiloBytes(double value) => new ByteSize((long)(value * _oneKiloByte));
public static ByteSize FromMegaBytes(double value) => new ByteSize((long)(value * _oneMegaByte));
public static ByteSize FromGigaBytes(double value) => new ByteSize((long)(value * _oneGigaByte));
public static ByteSize FromTeraBytes(double value) => new ByteSize((long)(value * _oneTeraByte));
public static ByteSize FromPetaBytes(double value) => new ByteSize((long)(value * _onePetaByte));
public ByteSize AddKiloBytes(double value) => new ByteSize((long)(value * _oneKiloByte) + _bytes);
public ByteSize AddMegaBytes(double value) => new ByteSize((long)(value * _oneMegaByte) + _bytes);
public ByteSize AddGigaBytes(double value) => new ByteSize((long)(value * _oneGigaByte) + _bytes);
public ByteSize AddTeraBytes(double value) => new ByteSize((long)(value * _oneTeraByte) + _bytes);
public ByteSize AddPetaBytes(double value) => new ByteSize((long)(value * _onePetaByte) + _bytes);
public double AsKiloBytes => (double)_bytes / _oneKiloByte;
public double AsMegaBytes => (double)_bytes / _oneMegaByte;
public double AsGigaBytes => (double)_bytes / _oneGigaByte;
public double AsTeraBytes => (double)_bytes / _oneTeraByte;
public double AsPetaBytes => (double)_bytes / _onePetaByte;
public string ToStringWithDecimalPrefixedShortUnitName(string? format = null, IFormatProvider? provider = null) =>
ToStringWithDecimalPrefixedUnitName(format, provider, true);
public string ToStringWithDecimalPrefixedLongUnitName(string? format = null, IFormatProvider? provider = null) =>
ToStringWithDecimalPrefixedUnitName(format, provider, true);
public string ToStringWithDecimalPrefixedUnitName(string? format = null, IFormatProvider? provider = null, bool useShortUnitName = true)
{
provider ??= CultureInfo.CurrentCulture;
return _bytes switch
{
var b when b >= _oneExaByte =>
(b / (double)_oneExaByte).ToString(format, provider) + (useShortUnitName ? " EB" : b == _oneExaByte ? " exabyte" : " exabytes"),
var b when b >= _onePetaByte =>
(b / (double)_onePetaByte).ToString(format, provider) + (useShortUnitName ? " PB" : b == _onePetaByte ? " petabyte" : " petabytes"),
var b when b >= _oneTeraByte =>
(b / (double)_oneTeraByte).ToString(format, provider) + (useShortUnitName ? " TB" : b == _oneTeraByte ? " terabyte" : " terabytes"),
var b when b >= _oneGigaByte =>
(b / (double)_oneGigaByte).ToString(format, provider) + (useShortUnitName ? " GB" : b == _oneGigaByte ? " gigabyte" : " gigabytes"),
var b when b >= _oneMegaByte =>
(b / (double)_oneMegaByte).ToString(format, provider) + (useShortUnitName ? " MB" : b == _oneMegaByte ? " megabyte" : " megabytes"),
var b when b >= _oneKiloByte =>
(b / (double)_oneKiloByte).ToString(format, provider) + (useShortUnitName ? " kB" : b == _oneKiloByte ? " kilobyte" : " kilobytes"),
var b =>
b.ToString(format, provider) + (useShortUnitName ? " B" : b == 1 ? " byte" : " bytes")
};
}
}
public readonly partial struct ByteSize
{
public static readonly Dictionary<string, long> DefaultMatchesForUnitsOfMeasure =
new Dictionary<string, long>(StringComparer.OrdinalIgnoreCase)
{
["kb"] = _oneKiloByte,
["kilobyte"] = _oneKiloByte,
["kilobytes"] = _oneKiloByte,
["kib"] = _oneKibiByte,
["kibibyte"] = _oneKibiByte,
["kibibytes"] = _oneKibiByte,
["mb"] = _oneMegaByte,
["megabyte"] = _oneMegaByte,
["megabytes"] = _oneMegaByte,
["mib"] = _oneMebiByte,
["mebibyte"] = _oneMebiByte,
["mebibytes"] = _oneMebiByte,
["gb"] = _oneGigaByte,
["gigabyte"] = _oneGigaByte,
["gigabytes"] = _oneGigaByte,
["gib"] = _oneGibiByte,
["gibibyte"] = _oneGibiByte,
["gibibytes"] = _oneGibiByte,
["tb"] = _oneTeraByte,
["terabyte"] = _oneTeraByte,
["terabytes"] = _oneTeraByte,
["tib"] = _oneTebiByte,
["tebibyte"] = _oneTebiByte,
["tebibytes"] = _oneTebiByte,
["pb"] = _onePetaByte,
["petabyte"] = _onePetaByte,
["petabytes"] = _onePetaByte,
["pib"] = _onePebiByte,
["pebibyte"] = _onePebiByte,
["pebibytes"] = _onePebiByte,
["eb"] = _oneExaByte,
["exabyte"] = _oneExaByte,
["exabytes"] = _oneExaByte,
["eib"] = _oneExbiByte,
["exbibyte"] = _oneExbiByte,
["exbibytes"] = _oneExbiByte,
};
public static bool TryParse(string s, out ByteSize size) =>
TryParse(s.AsSpan(), CultureInfo.CurrentCulture, out size, DefaultMatchesForUnitsOfMeasure);
public static bool TryParse(string s, IFormatProvider provider, out ByteSize size) =>
TryParse(s.AsSpan(), provider, out size, DefaultMatchesForUnitsOfMeasure);
public static bool TryParse(string s, IFormatProvider provider, out ByteSize size,
IDictionary<string, long> unitsOfMeasure) =>
TryParse(s.AsSpan(), provider, out size, unitsOfMeasure);
public static bool TryParse(ReadOnlySpan<char> span, IFormatProvider provider, out ByteSize size, IDictionary<string, long> unitsOfMeasure)
{
ReadOnlySpan<char> trimmedFromStart = SkipWhitespace(span);
ReadOnlySpan<char> doublePart = TakeUntilWhitespace(trimmedFromStart, out ReadOnlySpan<char> afterDouble);
ReadOnlySpan<char> unitPart = TakeUntilWhitespace(SkipWhitespace(afterDouble), out _);
if (!double.TryParse(doublePart.ToString(), NumberStyles.Any, provider, out var parsedDouble))
{
size = ZeroValue;
return false;
}
string unit = unitPart.ToString();
long multiplier;
if (unitsOfMeasure.ContainsKey(unit))
{
multiplier = unitsOfMeasure[unit];
}
else if (string.IsNullOrEmpty(unit))
{
multiplier = 1;
}
else
{
size = ZeroValue;
return false;
}
size = (long)(parsedDouble * multiplier);
return true;
}
public static ByteSize Parse(string text) =>
Parse(text.AsSpan(), CultureInfo.CurrentCulture, DefaultMatchesForUnitsOfMeasure);
public static ByteSize Parse(string text, IFormatProvider provider) =>
Parse(text.AsSpan(), provider, DefaultMatchesForUnitsOfMeasure);
public static ByteSize Parse(string text, IFormatProvider provider, IDictionary<string, long> unitsOfMeasure) =>
Parse(text.AsSpan(), provider, unitsOfMeasure);
public static ByteSize Parse(ReadOnlySpan<char> span, IFormatProvider provider, IDictionary<string, long> unitsOfMeasure)
{
ReadOnlySpan<char> trimmedFromStart = SkipWhitespace(span);
ReadOnlySpan<char> doublePart = TakeUntilWhitespace(trimmedFromStart, out ReadOnlySpan<char> afterDouble);
var doubleValue = double.Parse(doublePart.ToString(), provider);
ReadOnlySpan<char> unitPart = TakeUntilWhitespace(SkipWhitespace(afterDouble), out _);
string unit = unitPart.ToString();
long multiplier;
if (unitsOfMeasure.ContainsKey(unit))
{
multiplier = unitsOfMeasure[unit];
}
else if (string.IsNullOrEmpty(unit))
{
multiplier = 1;
}
else
{
throw new ArgumentException("Unknown unit.", nameof(span));
}
return (long)(doubleValue * multiplier);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ReadOnlySpan<char> SkipWhitespace(ReadOnlySpan<char> chars)
{
int start = 0;
for (int i = 0; i < chars.Length; i++)
{
if (char.IsWhiteSpace(chars[i]))
{
start++;
}
else
{
return chars.Slice(start, chars.Length - start);
}
}
return ReadOnlySpan<char>.Empty;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ReadOnlySpan<char> TakeUntilWhitespace(ReadOnlySpan<char> chars, out ReadOnlySpan<char> rest)
{
for (int i = 0; i < chars.Length; i++)
{
if (char.IsWhiteSpace(chars[i]))
{
rest = chars.Slice(i);
return chars.Slice(0, i);
}
}
rest = ReadOnlySpan<char>.Empty;
return chars;
}
}
}
#nullable restore

View File

@@ -10,68 +10,97 @@ using System.CommandLine.Invocation;
using System.CommandLine.Parsing;
using System.Threading.Tasks;
using System.Linq;
using System.Reflection;
using System.Reflection.Emit;
using Dapper;
using System.Security.Cryptography;
using Mono.Unix.Native;
namespace Files {
class Program {
private static async Task IndexFiles(bool isVerbose, DirectoryInfo startDirectory, CancellationToken ct) {
namespace Files
{
class Program
{
private static async Task IndexFiles(Configuration configuration, CancellationToken ct)
{
await AnsiConsole.Status()
.StartAsync("Thinking...", async ctx => {
using var connection = new SqliteConnection("Data Source=db.db");
.StartAsync("Thinking...", async ctx =>
{
await using var connection = new SqliteConnection("Data Source=db.db");
connection.Open();
await using var transaction = await connection.BeginTransactionAsync();
await using var transaction = await connection.BeginTransactionAsync(ct);
var cnt = connection.ExecuteScalar<int>("SELECT count(*) FROM sqlite_master WHERE type='table' AND name=@tableName;", new { tableName = "files" });
if (cnt == 0)
await InitializeDb(connection);
if (!configuration.SkipFileScanning)
{
connection.Execute("CREATE TABLE IF NOT EXISTS files (name TEXT, size INTEGER, inode INTEGER);");
}
Stack<string> directoriesStack = new Stack<string>();
directoriesStack.Push(configuration.InitialDirectory?.ToString() ?? ".");
Queue<string> directoriesQueue = new Queue<string>();
directoriesQueue.Enqueue(startDirectory?.ToString() ?? ".");
try
{
while (directoriesStack.TryPop(out string peekedDir))
{
string safePeekedDir = peekedDir.Replace("[", "[[").Replace("]", "]]");
ctx.Status(safePeekedDir);
try {
while (directoriesQueue.TryDequeue(out string peekedDir)) {
ctx.Status(peekedDir.Replace("[", "[[").Replace("]", "]]"));
UnixDirectoryInfo dirInfo = new(peekedDir);
if (!dirInfo.CanAccess(Mono.Unix.Native.AccessModes.R_OK)
|| !dirInfo.CanAccess(Mono.Unix.Native.AccessModes.X_OK)) {
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {dirInfo.ToString().Replace("[", "[[").Replace("]", "]]")}");
return;
}
UnixFileSystemInfo[] entries = dirInfo.GetFileSystemEntries();
foreach (UnixFileSystemInfo entry in entries) {
string relativePath = Path.Combine(peekedDir, entry.Name);
if (!entry.CanAccess(Mono.Unix.Native.AccessModes.R_OK)) {
if (entry.IsDirectory)
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {relativePath.Replace("[", "[[").Replace("]", "]]")}");
else if (entry.IsRegularFile)
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {relativePath.Replace("[", "[[").Replace("]", "]]")}");
continue;
}
if (entry.IsDirectory) {
directoriesQueue.Enqueue(relativePath);
continue;
}
connection.Execute("INSERT INTO files (name, size, inode) VALUES (@name, @Length, @Inode);", new { name = relativePath, entry.Length, entry.Inode });
if (isVerbose)
AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] {relativePath.Replace("[", "[[").Replace("]", "]]")}");
if (ct.IsCancellationRequested)
UnixDirectoryInfo dirInfo = new(peekedDir);
if (!dirInfo.CanAccess(Mono.Unix.Native.AccessModes.R_OK)
|| !dirInfo.CanAccess(Mono.Unix.Native.AccessModes.X_OK))
{
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {dirInfo.ToString().Replace("[", "[[").Replace("]", "]]")}");
return;
}
}
transaction.Commit();
} catch (Exception exception) {
await transaction.RollbackAsync();
AnsiConsole.WriteException(exception);
}
if (configuration.BeVerbose)
AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] :file_folder: {safePeekedDir}");
var entries = dirInfo.GetFileSystemEntries().OrderByDescending(e => e.Name);
foreach (UnixFileSystemInfo entry in entries)
{
string relativePath = Path.Combine(peekedDir, entry.Name);
string safeRelativePath = relativePath.Replace("[", "[[").Replace("]", "]]");
if (!entry.CanAccess(Mono.Unix.Native.AccessModes.R_OK))
{
if (entry.IsDirectory)
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {safeRelativePath}");
else if (entry.IsRegularFile)
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {safeRelativePath}");
continue;
}
if (entry.IsDirectory)
{
directoriesStack.Push(relativePath);
continue;
}
if (!entry.IsRegularFile)
{
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_FILE:[/] :red_exclamation_mark: {safeRelativePath}");
continue;
}
await connection.ExecuteAsync("INSERT OR REPLACE INTO files (name, size, inode) VALUES (@name, @Length, @Inode);", new { name = relativePath, entry.Length, entry.Inode });
if (configuration.BeVerbose)
AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] :page_facing_up: {safeRelativePath}");
if (ct.IsCancellationRequested)
return;
}
}
await transaction.CommitAsync(ct);
}
catch (Exception exception)
{
await transaction.RollbackAsync(ct);
AnsiConsole.WriteException(exception);
return;
}
if (configuration.BeVerbose) AnsiConsole.WriteLine();
return;
}
ctx.Status("Finding duplicates...");
@@ -79,78 +108,474 @@ namespace Files {
var potential = connection.Query<(int cnt, long size)>("SELECT COUNT(*) cnt, size FROM files WHERE size != 0 GROUP BY size HAVING cnt > 1 ORDER BY size * cnt DESC;");
foreach (var potentialFile in potential) {
var sameSize = connection.Query<DbRecord>("SELECT name, size, inode FROM files WHERE size = @size",
foreach (var potentialFile in potential)
{
if (ct.IsCancellationRequested)
return;
var sameSize = connection.Query<UnixFileRecord>("SELECT name, size, inode FROM files WHERE size = @size",
new { potentialFile.size }).ToList();
sameSize.CalculateHashes();
var recordsWithErrors = sameSize
.Where(r => !r.Hash.HasValue);
foreach (var dbRecord in recordsWithErrors)
{
AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {dbRecord.Name.Replace("[", "[[").Replace("]", "]]")}");
}
var equalGrouped = sameSize
.Where(r => r.Hash.HasValue)
.GroupBy(r=>r.Hash)
.Where(g=>g.Count() > 1)
.GroupBy(r => r.Hash)
.Where(g => g.Count() > 1)
.ToList();
foreach (var grp in equalGrouped) {
var root = new Tree(":double_exclamation_mark: " + grp.Key);
foreach (var item in grp) {
root.AddNode(item.Name);
foreach (var grp in equalGrouped)
{
if (ct.IsCancellationRequested)
return;
var records = grp.OrderByDescending(r => r.FileInfo.LinkCount).ToList();
UnixFileRecord head = records.First();
var tail = records.Skip(1).Where(r => r.INode != head.INode).ToList();
var tailWithDuplicates = records.Skip(1).Where(r => r.INode == head.INode).ToList();
ByteSize totalSize = records.Distinct(new DbRecordEqualityComparerByINode()).Sum(a => a.Size) - head.Size;
var root = new Tree((head.Size + totalSize).ToStringWithDecimalPrefixedShortUnitName() + " total.");
root.AddNode(((ByteSize)head.Size).ToStringWithDecimalPrefixedShortUnitName() + " " + head.Name.Replace("[", "[[").Replace("]", "]]"));
foreach (var item in tail)
{
if (configuration.EnableLinking)
{
try
{
// First rename
string tempFileName = item.FileInfo.FullName + ".to_hardlink";
File.Move(item.FileInfo.FullName, tempFileName);
try
{
// Then hardlink
head.FileInfo.CreateLink(item.FileInfo.FullName);
// Then delete
File.Delete(tempFileName);
root.AddNode("[green]:check_mark:[/] " +
item.Name.Replace("[", "[[").Replace("]", "]]"));
}
catch (Exception)
{
File.Move(tempFileName, item.FileInfo.FullName);
throw;
}
}
catch (Exception exception)
{
AnsiConsole.WriteException(exception, ExceptionFormats.ShortenEverything);
root.AddNode("[red]:cross_mark:[/] " +
item.Name.Replace("[", "[[").Replace("]", "]]"));
}
}
else
{
root.AddNode(((ByteSize)item.Size).ToStringWithDecimalPrefixedShortUnitName() + " " + item.Name.Replace("[", "[[").Replace("]", "]]"));
}
}
if (configuration.BeVerbose)
foreach (var duplicate in tailWithDuplicates)
{
root.AddNode("[white]:link:[/] 0B " +
duplicate.Name.Replace("[", "[[").Replace("]", "]]"));
}
if (tail.Any() || configuration.BeVerbose)
{
AnsiConsole.Render(root);
AnsiConsole.WriteLine();
}
AnsiConsole.Render(root);
}
}
});
});
}
private static async Task Main(string[] args) {
var verboseOption = new Option<bool>(new []{"--verbose", "-v"} ,"Verbose");
private static async Task InitializeDb(SqliteConnection connection)
{
await connection.ExecuteAsync(
"CREATE TABLE IF NOT EXISTS files (" +
"name TEXT PRIMARY KEY, " +
"size INTEGER NOT NULL, " +
"inode INTEGER NOT NULL, " +
"hash TEXT);");
await connection.ExecuteAsync("CREATE INDEX IF NOT EXISTS idx_files_size ON files(size);");
await connection.ExecuteAsync("CREATE INDEX IF NOT EXISTS idx_files_inode ON files(inode);");
}
private static async Task Main(string[] args)
{
var verboseOption = new Option<bool>(new[] { "--verbose", "-v" }, "Verbose");
var hardlinkOption = new Option<bool>(new[] { "--hardlink", "-l" }, "Hardlink duplicates");
var databaseOption = new Option<bool>(new[] { "--keep", "-k" }, () => true, "Keep database.");
var scanOption = new Option<bool>(new[] { "--no-scan" }, "Do not scan file system. Reuse database.");
var dbFileOption = new Option<FileInfo>(new[] { "--database", "-db" }, "Store database in file.");
var directoryArgument = new Argument<DirectoryInfo>(
result => new DirectoryInfo("./"), isDefault: true)
{
{
Name = "directory",
Description = "Directory to scan.",
Arity = ArgumentArity.ZeroOrOne,
}.ExistingOnly();
var rootCommand = new RootCommand("$ File -v false ./")
var rootCommand = new RootCommand("Find duplicate files.")
{
verboseOption,
hardlinkOption,
databaseOption,
scanOption,
dbFileOption,
directoryArgument,
};
ParseResult result = rootCommand.Parse(args);
ArgumentResult dirResult = result.FindResultFor(directoryArgument);
var dir = new DirectoryInfo(
dirResult.Tokens.FirstOrDefault()?.Value
?? dirResult.Argument.GetDefaultValue()?.ToString());
rootCommand.Handler = CommandHandler.Create<bool, CancellationToken>(
async (verbose, ct) => await IndexFiles(verbose, dir, ct));
var config = new Configuration
{
BeVerbose = result.ValueForOption(verboseOption),
EnableLinking = result.ValueForOption(hardlinkOption),
InitialDirectory = dir,
KeepDatabase = result.ValueForOption(databaseOption),
SkipFileScanning = result.ValueForOption(scanOption),
DatabaseFile = result.ValueForOption(dbFileOption),
};
rootCommand.Handler = CommandHandler.Create<CancellationToken>(
async ct =>
{
//await IndexFiles(config, ct);
await Begin(config, ct);
});
await rootCommand.InvokeAsync(args);
}
}
public class DbRecord {
private readonly Lazy<Guid?> _guid;
private static async Task Begin(Configuration configuration, CancellationToken ct) =>
await AnsiConsole.Status()
.StartAsync("Initializing...", async ctx =>
{
string dbFileName = configuration.DatabaseFile?.FullName ?? ":memory:";
await using var connection = new SqliteConnection($"Data Source={dbFileName};");
connection.Open();
await using var transaction = await connection.BeginTransactionAsync(ct);
public DbRecord() {
_guid = new Lazy<Guid?>(GetHash);
try
{
await InitializeDb(connection);
if (!configuration.SkipFileScanning)
{
await ScanFiles(configuration, connection, ctx, ct);
}
FindDuplicates(configuration, connection, ctx, ct);
await transaction.CommitAsync(ct);
}
catch (OperationCanceledException)
{
await transaction.RollbackAsync();
AnsiConsole.WriteLine("Canceled!");
}
catch (Exception exception)
{
await transaction.RollbackAsync();
AnsiConsole.WriteException(exception);
}
});
private static async Task ScanFiles(Configuration configuration, SqliteConnection sqliteConnection,
StatusContext statusContext, CancellationToken ct)
{
UnixFileSystemEnumerator.FilterEnumeratorDelegate filter = (directory, entry, entryType, errno) => true;
var pathEnumerable = UnixFileSystemEnumerator.EnumeratePaths(
configuration.InitialDirectory.ToString(),
filter,
ct);
foreach ((string entryPath, byte entryType, Errno errno) in pathEnumerable)
{
if (errno != 0)
{
string errorDescription = UnixMarshal.GetErrorDescription(errno);
string safeErrorDescription = errorDescription
.Replace("[", "[[")
.Replace("]", "]]");
string safePath = entryPath
.Replace("[", "[[")
.Replace("]", "]]");
AnsiConsole.MarkupLine($"[red]:cross_mark: {safeErrorDescription}:[/] :file_folder: {safePath}");
continue;
}
string entryTypeEmoji = entryType switch
{
DirentType.DT_DIR => Emoji.Known.FileFolder,
DirentType.DT_REG => Emoji.Known.PageFacingUp,
DirentType.DT_LNK => Emoji.Known.Link,
DirentType.DT_BLK => Emoji.Known.ComputerDisk,
DirentType.DT_CHR => Emoji.Known.Keyboard,
DirentType.DT_FIFO => Emoji.Known.PButton,
DirentType.DT_SOCK => Emoji.Known.ElectricPlug,
DirentType.DT_UNKNOWN => Emoji.Known.Potato,
_ => Emoji.Known.PileOfPoo,
};
if (!UnixFileSystemEnumerator.IsOfTarget(entryType, SearchTarget.DirectoriesAndFiles))
{
if(!configuration.BeVerbose) continue;
string safePath = entryPath
.Replace("[", "[[")
.Replace("]", "]]");
string fileType = entryType switch
{
DirentType.DT_DIR => "Directory",
DirentType.DT_REG => "Regular file",
DirentType.DT_LNK => "Symbolic link",
DirentType.DT_BLK => "Block device",
DirentType.DT_CHR => "Character device",
DirentType.DT_FIFO => "Named pipe",
DirentType.DT_SOCK => "Socket",
DirentType.DT_UNKNOWN => "UNKNOWN",
_ => "WRONG",
};
AnsiConsole.MarkupLine($"[yellow]{Emoji.Known.FastForwardButton} {fileType}:[/] {entryTypeEmoji} {safePath}");
continue;
}
UnixFileSystemInfo entry = entryType switch
{
DirentType.DT_REG => new UnixFileInfo(entryPath),
DirentType.DT_DIR => new UnixDirectoryInfo(entryPath),
DirentType.DT_LNK => new UnixSymbolicLinkInfo(entryPath),
_ => throw new FileLoadException($"Cannot scan {entryTypeEmoji} {entryPath}"),
};
if (!entry.GetValid())
{
string errorDescription = UnixMarshal.GetErrorDescription(Stdlib.GetLastError());
string safePath = entryPath
.Replace("[", "[[")
.Replace("]", "]]");
AnsiConsole.MarkupLine($"[red]:cross_mark: {errorDescription}:[/] {entryTypeEmoji} {safePath}");
continue;
}
string safeEntryPath = entryPath
.Replace("[", "[[")
.Replace("]", "]]");
if (entry.GetType() == typeof(UnixFileInfo)) // Faster than "is"
{
var file = (UnixFileInfo) entry;
var record = new UnixFileRecord(file);
await sqliteConnection.ExecuteAsync("INSERT OR REPLACE INTO files (name, size, inode) VALUES (@Name, @Size, @INode);", record);
if (configuration.BeVerbose)
AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] {entryTypeEmoji} {safeEntryPath}");
}
else if (entry.GetType() == typeof(UnixDirectoryInfo)) // Faster than "is"
{
var directory = (UnixDirectoryInfo)entry;
statusContext.Status(safeEntryPath);
}
else if (entry.GetType() == typeof(UnixSymbolicLinkInfo)) // Faster than "is"
{
var symLink = (UnixSymbolicLinkInfo)entry;
}
if (ct.IsCancellationRequested)
return;
}
}
public string Name { get; set; }
public long Size { get; set; }
public long Inode { get; set; }
public Guid? Hash => _guid.Value;
private static void FindDuplicates(Configuration configuration,
SqliteConnection connection, StatusContext ctx,
CancellationToken ct)
{
ctx.Status("Finding duplicates...");
ctx.Spinner(Spinner.Known.Aesthetic);
public Guid? GetHash() {
try {
using FileStream stream = File.OpenRead(Name);
var md5 = MD5.Create();
var bytes = md5.ComputeHash(stream);
return new Guid(bytes);
} catch {
return null;
var potential = connection.Query<(int cnt, long size)>(
"SELECT COUNT(*) cnt, size FROM files WHERE size != 0 GROUP BY size HAVING cnt > 1 ORDER BY size * cnt DESC;");
foreach (var potentialFile in potential)
{
ct.ThrowIfCancellationRequested();
var sameSize = connection.Query<UnixFileRecord>("SELECT name, size, inode FROM files WHERE size = @size",
new { potentialFile.size }).ToList();
var recordsWithErrors = sameSize
.Where(r => !r.Hash.HasValue);
foreach (var dbRecord in recordsWithErrors)
{
AnsiConsole.MarkupLine(
$"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {dbRecord.Name.Replace("[", "[[").Replace("]", "]]")}");
}
var equalGrouped = sameSize
.Where(r => r.Hash.HasValue)
.GroupBy(r => r.Hash)
.Where(g => g.Count() > 1)
.ToList();
foreach (var grp in equalGrouped)
{
ct.ThrowIfCancellationRequested();
var records = grp.OrderByDescending(r => r.FileInfo.LinkCount).ToList();
UnixFileRecord head = records.First();
var tail = records.Skip(1).Where(r => r.INode != head.INode).ToList();
var tailWithDuplicates = records.Skip(1).Where(r => r.INode == head.INode).ToList();
ByteSize totalSize = records.Distinct(new DbRecordEqualityComparerByINode()).Sum(a => a.Size) - head.Size;
var root = new Tree((head.Size + totalSize).ToStringWithDecimalPrefixedShortUnitName() + " total.");
root.AddNode(((ByteSize)head.Size).ToStringWithDecimalPrefixedShortUnitName() + " " +
head.Name.Replace("[", "[[").Replace("]", "]]"));
foreach (var item in tail)
{
if (configuration.EnableLinking)
{
try
{
// First rename
string tempFileName = item.FileInfo.FullName + ".to_hardlink";
File.Move(item.FileInfo.FullName, tempFileName);
try
{
// Then hardlink
head.FileInfo.CreateLink(item.FileInfo.FullName);
// Then delete
File.Delete(tempFileName);
root.AddNode("[green]:check_mark:[/] " +
item.Name.Replace("[", "[[").Replace("]", "]]"));
}
catch (Exception)
{
File.Move(tempFileName, item.FileInfo.FullName);
throw;
}
}
catch (Exception exception)
{
AnsiConsole.WriteException(exception, ExceptionFormats.ShortenEverything);
root.AddNode("[red]:cross_mark:[/] " +
item.Name.Replace("[", "[[").Replace("]", "]]"));
}
}
else
{
root.AddNode(((ByteSize)item.Size).ToStringWithDecimalPrefixedShortUnitName() + " " +
item.Name.Replace("[", "[[").Replace("]", "]]"));
}
}
if (configuration.BeVerbose)
foreach (var duplicate in tailWithDuplicates)
{
root.AddNode("[white]:anchor:[/] 0B " +
duplicate.Name.Replace("[", "[[").Replace("]", "]]"));
}
if (tail.Any() || configuration.BeVerbose)
{
AnsiConsole.Render(root);
AnsiConsole.WriteLine();
}
}
}
}
}
class Configuration
{
public bool BeVerbose { get; set; }
public bool EnableLinking { get; set; }
public DirectoryInfo InitialDirectory { get; set; }
public bool KeepDatabase { get; set; }
public bool SkipFileScanning { get; set; }
public FileInfo DatabaseFile { get; set; }
}
public class DbRecordEqualityComparerByINode : EqualityComparer<UnixFileRecord>
{
public override bool Equals(UnixFileRecord x, UnixFileRecord y)
{
return x?.INode == y?.INode;
}
public override int GetHashCode(UnixFileRecord obj)
{
return obj.INode.GetHashCode();
}
}
static class OriginalPathUnixFileSystemInfo
{
private static readonly Func<UnixFileSystemInfo, string> GetOriginalPathFunc;
private static readonly Func<UnixFileSystemInfo, bool> GetValidFunc;
static OriginalPathUnixFileSystemInfo()
{
var method = new DynamicMethod("cheat", typeof(string), new[] { typeof(UnixFileSystemInfo) }, typeof(UnixFileSystemInfo), true);
var il = method.GetILGenerator();
il.Emit(OpCodes.Ldarg_0);
//il.Emit(OpCodes.Castclass, typeof(UnixFileSystemInfo));
il.Emit(OpCodes.Callvirt, typeof(UnixFileSystemInfo)
.GetProperty("OriginalPath", BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic)
.GetGetMethod(true));
il.Emit(OpCodes.Ret);
GetOriginalPathFunc = (Func<UnixFileSystemInfo, string>)method.CreateDelegate(typeof(Func<UnixFileSystemInfo, string>));
var method2 = new DynamicMethod("cheat2", typeof(bool), new[] { typeof(UnixFileSystemInfo) }, typeof(UnixFileSystemInfo), true);
var il2 = method2.GetILGenerator();
il2.Emit(OpCodes.Ldarg_0);
//il2.Emit(OpCodes.Castclass, typeof(UnixFileSystemInfo));
il2.Emit(OpCodes.Ldfld, typeof(UnixFileSystemInfo)
.GetField("valid", BindingFlags.Instance | BindingFlags.NonPublic));
il2.Emit(OpCodes.Ret);
GetValidFunc = (Func<UnixFileSystemInfo, bool>)method2.CreateDelegate(typeof(Func<UnixFileSystemInfo, bool>));
}
public static string GetOriginalPath(this UnixFileSystemInfo info) => GetOriginalPathFunc(info);
public static bool GetValid(this UnixFileSystemInfo info) => GetValidFunc(info);
public static long GetSizeOnDisk(this UnixFileSystemInfo info) => info.BlocksAllocated * 512;
}
}

View File

@@ -2,6 +2,11 @@
"profiles": {
"Files": {
"commandName": "Project"
},
"WSL 2": {
"commandName": "WSL2",
"environmentVariables": {},
"distributionName": ""
}
}
}

224
Files/UnixFileRecord.cs Normal file
View File

@@ -0,0 +1,224 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Threading;
using System.Threading.Tasks;
using Mono.Unix;
namespace Files
{
public static class UnixFileRecordExtensions
{
public static void CalculateHashes(this IEnumerable<UnixFileRecord> records)
{
foreach (var recordsGroup in records.GroupBy(r => r.INode))
{
Guid? hash = UnixFileRecord.GetHash(recordsGroup.First().FileInfo);
if (!hash.HasValue) continue;
foreach (UnixFileRecord unixFileRecord in recordsGroup)
{
unixFileRecord.SetHash(hash.Value);
}
}
}
}
public class UnixFileRecord
{
private readonly Lazy<Guid?> _guid;
private readonly Lazy<UnixFileInfo> _fileInfo;
private Guid? _preCalculatedHash = null;
public UnixFileRecord()
{
_guid = new Lazy<Guid?>(GetHash);
_fileInfo = new Lazy<UnixFileInfo>(GetFileInfo);
}
public UnixFileRecord(string filePath, long size, long iNode)
{
Name = filePath;
Size = size;
INode = iNode;
_guid = new Lazy<Guid?>(GetHash);
_fileInfo = new Lazy<UnixFileInfo>(GetFileInfo);
}
public UnixFileRecord(UnixFileInfo fileInfo)
{
_guid = new Lazy<Guid?>(GetHash);
_fileInfo = new Lazy<UnixFileInfo>(fileInfo);
Name = fileInfo.GetOriginalPath();
Size = fileInfo.Length;
INode = fileInfo.Inode;
}
public UnixFileRecord(UnixFileInfo fileInfo, Guid hash)
{
_guid = new Lazy<Guid?>(hash);
_fileInfo = new Lazy<UnixFileInfo>(fileInfo);
Name = fileInfo.GetOriginalPath();
Size = fileInfo.Length;
INode = fileInfo.Inode;
}
public string Name { get; init; }
public long Size { get; init; }
public long INode { get; init; }
public Guid? Hash => _guid.Value;
public UnixFileInfo FileInfo => _fileInfo.Value;
public void SetHash(Guid hash) => _preCalculatedHash = hash;
private UnixFileInfo GetFileInfo() => new(Name);
private Guid? GetHash() => _preCalculatedHash ??= GetHash(Name);
private async Task<Guid?> GetHashAsync(CancellationToken ct = default) => await GetHashAsync(Name, ct);
private Guid? GetHash2(CancellationToken ct = default) => GetHash2(Name, ct);
private async Task<Guid?> GetHash2Async(CancellationToken ct = default) => await GetHash2Async(Name, ct);
public static Guid? GetHash(string filePath)
{
try
{
using FileStream stream = File.OpenRead(filePath);
var md5 = MD5.Create();
var bytes = md5.ComputeHash(stream);
return new Guid(bytes);
}
catch
{
return null;
}
}
public static Guid? GetHash(UnixFileInfo file)
{
try
{
using UnixStream stream = file.Open(FileMode.Open);
var md5 = MD5.Create();
var bytes = md5.ComputeHash(stream);
return new Guid(bytes);
}
catch
{
return null;
}
}
public static async Task<Guid?> GetHashAsync(string filePath, CancellationToken ct = default)
{
try
{
await using FileStream stream = File.OpenRead(filePath);
var md5 = MD5.Create();
var bytes = await md5.ComputeHashAsync(stream, ct);
return new Guid(bytes);
}
catch
{
return null;
}
}
public static async Task<Guid?> GetHashAsync(UnixFileInfo file, CancellationToken ct = default)
{
try
{
await using UnixStream stream = file.Open(FileMode.Open);
var md5 = MD5.Create();
var bytes = await md5.ComputeHashAsync(stream, ct);
return new Guid(bytes);
}
catch
{
return null;
}
}
public static Guid? GetHash2(string filePath, CancellationToken ct = default)
{
using IncrementalHash incrementalHash = IncrementalHash.CreateHash(HashAlgorithmName.MD5);
using FileStream inputStream = File.OpenRead(filePath);
byte[] buffer = ArrayPool<byte>.Shared.Rent(4096);
try
{
int bytesRead;
int clearLimit = 0;
while ((bytesRead = inputStream.Read(buffer, 0, buffer.Length)) > 0)
{
if (bytesRead > clearLimit)
{
clearLimit = bytesRead;
}
if (ct.IsCancellationRequested) return null;
incrementalHash.AppendData(buffer, 0, bytesRead);
}
byte[] hashBytes = incrementalHash.GetHashAndReset();
return new Guid(hashBytes);
}
catch
{
return null;
}
finally
{
//CryptographicOperations.ZeroMemory(buffer.AsSpan(0, clearLimit));
ArrayPool<byte>.Shared.Return(buffer, clearArray: false);
}
}
public static async Task<Guid?> GetHash2Async(string filePath, CancellationToken ct = default)
{
using IncrementalHash incrementalHash = IncrementalHash.CreateHash(HashAlgorithmName.MD5);
await using FileStream inputStream = File.OpenRead(filePath);
byte[] buffer = ArrayPool<byte>.Shared.Rent(4096);
try
{
int bytesRead;
int clearLimit = 0;
while ((bytesRead = await inputStream.ReadAsync(buffer, 0, buffer.Length, ct)) > 0)
{
if (bytesRead > clearLimit)
{
clearLimit = bytesRead;
}
if (ct.IsCancellationRequested) return null;
incrementalHash.AppendData(buffer, 0, bytesRead);
}
byte[] hashBytes = incrementalHash.GetHashAndReset();
return new Guid(hashBytes);
}
catch
{
return null;
}
finally
{
//CryptographicOperations.ZeroMemory(buffer.AsSpan(0, clearLimit));
ArrayPool<byte>.Shared.Return(buffer, clearArray: false);
}
}
}
}

View File

@@ -0,0 +1,247 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Threading;
using Mono.Unix;
using Mono.Unix.Native;
namespace Files
{
[Flags]
public enum SearchTarget : byte
{
/// <summary>
/// Directories.
/// </summary>
Directories = 1,
/// <summary>
/// Regular files.
/// </summary>
Files = 2,
/// <summary>
/// Symbolic links.
/// </summary>
SymLinks = 4,
/// <summary>
/// Named pipes, or FIFOs.
/// </summary>
NamedPipes = 8,
/// <summary>
/// Local-domain socket.
/// </summary>
Sockets = 16,
/// <summary>
/// Character devices.
/// </summary>
CharacterDevices = 32,
/// <summary>
/// Block devices.
/// </summary>
BlockDevices = 64,
DirectoriesAndFiles = Directories | Files,
DirectoriesAndFilesAndSymLinks = Directories | Files | SymLinks,
}
/* File types for `d_type'. */
public static class DirentType
{
/// <summary>
/// The type is unknown. Only some filesystems have full support to return the type of the file, others might always return this value.
/// </summary>
public const byte DT_UNKNOWN = 0;
/// <summary>
/// A named pipe, or FIFO.
/// </summary>
public const byte DT_FIFO = 1;
/// <summary>
/// A character device.
/// </summary>
public const byte DT_CHR = 2;
/// <summary>
/// A directory.
/// </summary>
public const byte DT_DIR = 4;
/// <summary>
/// A block device.
/// </summary>
public const byte DT_BLK = 6;
/// <summary>
/// A regular file.
/// </summary>
public const byte DT_REG = 8;
/// <summary>
/// A symbolic link.
/// </summary>
public const byte DT_LNK = 10;
/// <summary>
/// A local-domain socket.
/// </summary>
public const byte DT_SOCK = 12;
}
public static class UnixFileSystemEnumerator
{
private const string RelativeCurrentDir = ".";
private const string RelativeParentDir = "..";
public static bool TryGetEntries(string directoryPath, out List<Dirent> list, out Errno error)
{
list = new List<Dirent>();
IntPtr dirPointer = Syscall.opendir(directoryPath);
if (dirPointer == IntPtr.Zero)
{
error = Stdlib.GetLastError();
return false;
}
IntPtr result;
int returnValue;
do
{
Dirent entry = new();
returnValue = Syscall.readdir_r(dirPointer, entry, out result);
if (returnValue == 0 && result != IntPtr.Zero && (entry.d_name != RelativeCurrentDir && entry.d_name != RelativeParentDir))
{
list.Add(entry);
}
}
while (returnValue == 0 && result != IntPtr.Zero);
if (returnValue == 0)
{
error = 0;
return true;
}
error = Stdlib.GetLastError();
return false;
}
public delegate bool FilterEnumeratorDelegate(string directory, string directoryEntry, byte entryType,
Errno errno);
public static IEnumerable<(string path, byte type, Errno errno)> EnumeratePaths(
string path, FilterEnumeratorDelegate filter = null, CancellationToken ct = default)
{
LinkedList<string> directoriesStack = new LinkedList<string>();
directoriesStack.AddLast(path);
while (directoriesStack.Last != null)
{
string dir = directoriesStack.Last.ValueRef;
directoriesStack.RemoveLast();
if (!TryGetEntries(dir, out List<Dirent> entries, out Errno errno))
{
if(!(filter?.Invoke(dir, ".", DirentType.DT_DIR, errno) ?? true)) yield break;
yield return (dir, DirentType.DT_DIR, errno);
}
foreach (Dirent entry in entries)
{
ct.ThrowIfCancellationRequested();
if (!(filter?.Invoke(dir, entry.d_name, entry.d_type, 0) ?? true)) continue;
string combinedPath = Path.Combine(dir, entry.d_name);
if (entry.d_type == DirentType.DT_DIR) // Directory
{
directoriesStack.AddLast(combinedPath);
}
yield return (combinedPath, entry.d_type, 0);
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsOfTarget(this Dirent entry, SearchTarget desiredTarget)
{
return entry.d_type switch
{
DirentType.DT_DIR => (desiredTarget & SearchTarget.Directories) == SearchTarget.Directories,
DirentType.DT_REG => (desiredTarget & SearchTarget.Files) == SearchTarget.Files,
DirentType.DT_LNK => (desiredTarget & SearchTarget.SymLinks) == SearchTarget.SymLinks,
DirentType.DT_FIFO => (desiredTarget & SearchTarget.NamedPipes) == SearchTarget.NamedPipes,
DirentType.DT_SOCK => (desiredTarget & SearchTarget.Sockets) == SearchTarget.Sockets,
DirentType.DT_CHR => (desiredTarget & SearchTarget.CharacterDevices) == SearchTarget.CharacterDevices,
DirentType.DT_BLK => (desiredTarget & SearchTarget.BlockDevices) == SearchTarget.BlockDevices,
_ => false
};
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsOfTarget(byte entryType, SearchTarget desiredTarget)
{
return entryType switch
{
DirentType.DT_DIR => (desiredTarget & SearchTarget.Directories) == SearchTarget.Directories,
DirentType.DT_REG => (desiredTarget & SearchTarget.Files) == SearchTarget.Files,
DirentType.DT_LNK => (desiredTarget & SearchTarget.SymLinks) == SearchTarget.SymLinks,
DirentType.DT_FIFO => (desiredTarget & SearchTarget.NamedPipes) == SearchTarget.NamedPipes,
DirentType.DT_SOCK => (desiredTarget & SearchTarget.Sockets) == SearchTarget.Sockets,
DirentType.DT_CHR => (desiredTarget & SearchTarget.CharacterDevices) == SearchTarget.CharacterDevices,
DirentType.DT_BLK => (desiredTarget & SearchTarget.BlockDevices) == SearchTarget.BlockDevices,
_ => false
};
}
public static Exception CreateExceptionForError(this Errno errno)
{
string errorDescription = UnixMarshal.GetErrorDescription(errno);
UnixIOException unixIoException = new(errno);
switch (errno)
{
case Errno.EPERM:
case Errno.EOPNOTSUPP:
return new InvalidOperationException(errorDescription, unixIoException);
case Errno.ENOENT:
return new FileNotFoundException(errorDescription, unixIoException);
case Errno.EIO:
case Errno.ENXIO:
case Errno.ENOSPC:
case Errno.ESPIPE:
case Errno.EROFS:
case Errno.ENOTEMPTY:
return new IOException(errorDescription, unixIoException);
case Errno.ENOEXEC:
return new InvalidProgramException(errorDescription, unixIoException);
case Errno.EBADF:
case Errno.EINVAL:
return new ArgumentException(errorDescription, unixIoException);
case Errno.EACCES:
case Errno.EISDIR:
return new UnauthorizedAccessException(errorDescription, unixIoException);
case Errno.EFAULT:
return new NullReferenceException(errorDescription, unixIoException);
case Errno.ENOTDIR:
return new DirectoryNotFoundException(errorDescription, unixIoException);
case Errno.ERANGE:
return new ArgumentOutOfRangeException(errorDescription);
case Errno.ENAMETOOLONG:
return new PathTooLongException(errorDescription, unixIoException);
case Errno.EOVERFLOW:
return new OverflowException(errorDescription, unixIoException);
default:
return unixIoException;
}
}
}
}

Binary file not shown.