diff --git a/Files.sln b/Files.sln new file mode 100644 index 0000000..1091ce7 --- /dev/null +++ b/Files.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.31025.218 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Files", "Files\Files.csproj", "{28005FE3-CBA7-4D00-9272-180C392D35A9}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {28005FE3-CBA7-4D00-9272-180C392D35A9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {28005FE3-CBA7-4D00-9272-180C392D35A9}.Debug|Any CPU.Build.0 = Debug|Any CPU + {28005FE3-CBA7-4D00-9272-180C392D35A9}.Release|Any CPU.ActiveCfg = Release|Any CPU + {28005FE3-CBA7-4D00-9272-180C392D35A9}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {FB5DD751-57CA-4F80-923A-1CE1C71654D7} + EndGlobalSection +EndGlobal diff --git a/Files/.vscode/launch.json b/Files/.vscode/launch.json new file mode 100644 index 0000000..8198701 --- /dev/null +++ b/Files/.vscode/launch.json @@ -0,0 +1,27 @@ +{ + "version": "0.2.0", + "configurations": [ + { + // Use IntelliSense to find out which attributes exist for C# debugging + // Use hover for the description of the existing attributes + // For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md + "name": ".NET Core Launch (console)", + "type": "coreclr", + "request": "launch", + "preLaunchTask": "build", + // If you have changed target frameworks, make sure to update the program path. + "program": "${workspaceFolder}/bin/Debug/net6.0/Files.dll", + "args": [], + "cwd": "${workspaceFolder}", + // For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console + "console": "internalConsole", + "stopAtEntry": false + }, + { + "name": ".NET Core Attach", + "type": "coreclr", + "request": "attach", + "processId": "${command:pickProcess}" + } + ] +} \ No newline at end of file diff --git a/Files/.vscode/tasks.json b/Files/.vscode/tasks.json new file mode 100644 index 0000000..de89ee3 --- /dev/null +++ b/Files/.vscode/tasks.json @@ -0,0 +1,42 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "command": "dotnet", + "type": "process", + "args": [ + "build", + "${workspaceFolder}/Files.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "publish", + "command": "dotnet", + "type": "process", + "args": [ + "publish", + "${workspaceFolder}/Files.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "watch", + "command": "dotnet", + "type": "process", + "args": [ + "watch", + "run", + "${workspaceFolder}/Files.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + } + ] +} \ No newline at end of file diff --git a/Files/Files.csproj b/Files/Files.csproj new file mode 100644 index 0000000..8a58449 --- /dev/null +++ b/Files/Files.csproj @@ -0,0 +1,16 @@ + + + + Exe + net5.0 + + + + + + + + + + + diff --git a/Files/Program.cs b/Files/Program.cs new file mode 100644 index 0000000..f8449df --- /dev/null +++ b/Files/Program.cs @@ -0,0 +1,156 @@ +using System; +using System.Collections.Generic; +using System.IO; +using Spectre.Console; +using Microsoft.Data.Sqlite; +using Mono.Unix; +using System.Threading; +using System.CommandLine; +using System.CommandLine.Invocation; +using System.CommandLine.Parsing; +using System.Threading.Tasks; +using System.Linq; +using Dapper; +using System.Security.Cryptography; + +namespace Files { + class Program { + private static async Task IndexFiles(bool isVerbose, DirectoryInfo startDirectory, CancellationToken ct) { + await AnsiConsole.Status() + .StartAsync("Thinking...", async ctx => { + using var connection = new SqliteConnection("Data Source=db.db"); + connection.Open(); + await using var transaction = await connection.BeginTransactionAsync(); + + var cnt = connection.ExecuteScalar("SELECT count(*) FROM sqlite_master WHERE type='table' AND name=@tableName;", new { tableName = "files" }); + if (cnt == 0) + { + connection.Execute("CREATE TABLE IF NOT EXISTS files (name TEXT, size INTEGER, inode INTEGER);"); + } + + Queue directoriesQueue = new Queue(); + directoriesQueue.Enqueue(startDirectory?.ToString() ?? "."); + + try { + while (directoriesQueue.TryDequeue(out string peekedDir)) { + ctx.Status(peekedDir.Replace("[", "[[").Replace("]", "]]")); + + UnixDirectoryInfo dirInfo = new(peekedDir); + if (!dirInfo.CanAccess(Mono.Unix.Native.AccessModes.R_OK) + || !dirInfo.CanAccess(Mono.Unix.Native.AccessModes.X_OK)) { + AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {dirInfo.ToString().Replace("[", "[[").Replace("]", "]]")}"); + return; + } + UnixFileSystemInfo[] entries = dirInfo.GetFileSystemEntries(); + foreach (UnixFileSystemInfo entry in entries) { + string relativePath = Path.Combine(peekedDir, entry.Name); + if (!entry.CanAccess(Mono.Unix.Native.AccessModes.R_OK)) { + if (entry.IsDirectory) + AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :file_folder: {relativePath.Replace("[", "[[").Replace("]", "]]")}"); + else if (entry.IsRegularFile) + AnsiConsole.MarkupLine($"[red]:cross_mark: NO_ACCESS:[/] :page_facing_up: {relativePath.Replace("[", "[[").Replace("]", "]]")}"); + continue; + } + + if (entry.IsDirectory) { + directoriesQueue.Enqueue(relativePath); + continue; + } + + connection.Execute("INSERT INTO files (name, size, inode) VALUES (@name, @Length, @Inode);", new { name = relativePath, entry.Length, entry.Inode }); + + if (isVerbose) + AnsiConsole.MarkupLine($"[green]:check_mark: OK:[/] {relativePath.Replace("[", "[[").Replace("]", "]]")}"); + + if (ct.IsCancellationRequested) + return; + } + } + transaction.Commit(); + } catch (Exception exception) { + await transaction.RollbackAsync(); + AnsiConsole.WriteException(exception); + + return; + } + + ctx.Status("Finding duplicates..."); + ctx.Spinner(Spinner.Known.Aesthetic); + + var potential = connection.Query<(int cnt, long size)>("SELECT COUNT(*) cnt, size FROM files WHERE size != 0 GROUP BY size HAVING cnt > 1 ORDER BY size * cnt DESC;"); + + foreach (var potentialFile in potential) { + var sameSize = connection.Query("SELECT name, size, inode FROM files WHERE size = @size", + new { potentialFile.size }).ToList(); + + var equalGrouped = sameSize + .Where(r => r.Hash.HasValue) + .GroupBy(r=>r.Hash) + .Where(g=>g.Count() > 1) + .ToList(); + + foreach (var grp in equalGrouped) { + var root = new Tree(":double_exclamation_mark: " + grp.Key); + foreach (var item in grp) { + root.AddNode(item.Name); + } + AnsiConsole.Render(root); + } + } + + }); + } + + private static async Task Main(string[] args) { + var verboseOption = new Option(new []{"--verbose", "-v"} ,"Verbose"); + var directoryArgument = new Argument( + result => new DirectoryInfo("./"), isDefault: true) + { + Name = "directory", + Description = "Directory to scan.", + Arity = ArgumentArity.ZeroOrOne, + }.ExistingOnly(); + + var rootCommand = new RootCommand("$ File -v false ./") + { + verboseOption, + directoryArgument, + }; + + ParseResult result = rootCommand.Parse(args); + ArgumentResult dirResult = result.FindResultFor(directoryArgument); + var dir = new DirectoryInfo( + dirResult.Tokens.FirstOrDefault()?.Value + ?? dirResult.Argument.GetDefaultValue()?.ToString()); + + rootCommand.Handler = CommandHandler.Create( + async (verbose, ct) => await IndexFiles(verbose, dir, ct)); + + await rootCommand.InvokeAsync(args); + } + } + + public class DbRecord { + private readonly Lazy _guid; + + public DbRecord() { + _guid = new Lazy(GetHash); + } + + public string Name { get; set; } + public long Size { get; set; } + public long Inode { get; set; } + public Guid? Hash => _guid.Value; + + public Guid? GetHash() { + try { + using FileStream stream = File.OpenRead(Name); + var md5 = MD5.Create(); + var bytes = md5.ComputeHash(stream); + return new Guid(bytes); + } catch { + return null; + } + } + } +} diff --git a/Files/Properties/launchSettings.json b/Files/Properties/launchSettings.json new file mode 100644 index 0000000..f710fc4 --- /dev/null +++ b/Files/Properties/launchSettings.json @@ -0,0 +1,7 @@ +{ + "profiles": { + "Files": { + "commandName": "Project" + } + } +} \ No newline at end of file diff --git a/Files/db.db b/Files/db.db new file mode 100644 index 0000000..825c336 Binary files /dev/null and b/Files/db.db differ diff --git a/global.json b/global.json new file mode 100644 index 0000000..ac1ec45 --- /dev/null +++ b/global.json @@ -0,0 +1,7 @@ +{ + "sdk": { + "version": "5.0.0", + "allowPrerelease": true, + "rollForward": "latestMinor" + } +} \ No newline at end of file