feat: add library scanning mechanic

This commit is contained in:
2026-01-26 04:35:12 +01:00
parent 3a68531fb4
commit d46a2573c4
7 changed files with 355 additions and 14 deletions

View File

@@ -70,6 +70,7 @@ builder.Services.AddHttpLogging();
var app = builder.Build();
bool shutdown = false;
bool scanNeeded = false;
if (args.FirstOrDefault() is not null)
{
// Handle CLI if arguments have been passed.
@@ -89,6 +90,7 @@ using (IServiceScope scope = app.Services.CreateScope())
GeneralUseHelpers guhf = scope.ServiceProvider.GetRequiredService<GeneralUseHelpers>();
Seeder seeder = new(db, guhf);
shutdown = seeder.Seed();
if (!shutdown) scanNeeded = await seeder.ScanPrefetchAsync();
}
if (shutdown) return;

View File

@@ -45,7 +45,6 @@
<ItemGroup>
<Folder Include="Controllers\" />
<Folder Include="Migrations\" />
<Folder Include="Mapping\" />
<Folder Include="DTOs\" />
</ItemGroup>

View File

@@ -5,10 +5,10 @@ using System.Text;
namespace Shadow.Tools;
public class GeneralUseHelpers(ApplicationDbContext db, IConfiguration appsettings)
public class GeneralUseHelpers(ApplicationDbContext? db = null, IConfiguration? appsettings = null)
{
private readonly ApplicationDbContext _db = db;
private readonly IConfiguration _appsettings = appsettings;
private readonly ApplicationDbContext? _db = db;
private readonly IConfiguration? _appsettings = appsettings;
//async public Task<User?> GetUserFromEmail(string email)
@@ -27,4 +27,23 @@ public class GeneralUseHelpers(ApplicationDbContext db, IConfiguration appsettin
// }
//}
/// <summary>
/// Quick and dirty Dictionary&lt;string, string&gt; to JSON serializer
/// </summary>
/// <param name="dict">Dictionary with keypair of two strings</param>
/// <returns>Minified JSON</returns>
public static string DictAsJson(Dictionary<string, string> dict)
{
string resultJson = String.Empty;
foreach (string key in dict.Keys)
{
string cleanKey = key.Replace("\"", "\\\""); // "a"b" -> "a\"b"
string cleanValue = dict[key].Replace("\"", "\\\"");
resultJson += $"\"{cleanKey}\": \"{cleanValue}\", " // "key": "val",<space>
.Replace(@"\", @"\\"); // a\b -> a\\b
}
return "{" + resultJson[..^2] + "}";
}
}

91
Tools/LibraryWatcher.cs Normal file
View File

@@ -0,0 +1,91 @@
using Shadow.Data;
using Shadow.Entities;
using System;
using System.IO;
namespace Shadow.Tools;
public class LibraryWatcher(string watchPath, string[] excludedPaths, ApplicationDbContext dbContext)
{
private readonly string libraryPath = watchPath;
private readonly string[] excludedPaths = excludedPaths;
private readonly ApplicationDbContext db = dbContext;
private readonly GeneralUseHelpers guhf = new();
/// <summary>
/// Returns a sorted list of paths to all files in a directory, recursively.
/// </summary>
/// <param name="directory">Path to directory</param>
/// <returns>Sorted list of filepaths</returns>
public async Task<List<string>> GetFilesRecursivelyAsync(string directory)
{
string[] allowedExtensions = [".flac", ".m4a", ".mp3", ".ogg", ".wav"];
try
{
List<string> files =
Directory.GetFiles(directory, "*", SearchOption.AllDirectories)
.Where(file => allowedExtensions.Any(file.ToLower().EndsWith))
.ToList();
files.Sort();
return files;
}
catch (DirectoryNotFoundException)
{
Console.WriteLine($"[Error] Directory \"{directory}\" does not exist!\n" +
" Please create it manually, or use `Shadow setupWizard`.");
throw new DirectoryNotFoundException();
}
}
/// <summary>
/// Return all multimedia content inside of library
/// </summary>
/// <returns>List of multimedia filepaths</returns>
public async Task<List<string>> GetAllMultimediaAsync()
{
// List files in cache
// Note: currently, the only excluded path from scanning is the thumbnail cache.
// This might change in the future.
List<string> cacheFiles = await GetFilesRecursivelyAsync(excludedPaths[0]);
// List files in library excluding cache
List<string> libraryContent = await GetFilesRecursivelyAsync(libraryPath);
List<string> libraryMultimedia = libraryContent.Except(cacheFiles).ToList();
return libraryMultimedia;
}
/// <summary>
/// Scan the library in its entirety
/// </summary>
/// <returns></returns>
public async Task<List<string>> PerformFullScanAsync()
{
Console.WriteLine("Performing full library scan...");
List<string> multimedia = await GetAllMultimediaAsync();
foreach (string filepath in multimedia)
{
Console.WriteLine(filepath);
Dictionary<string, string> fileInfo = await MetadataExtractor.ExtractAsync(filepath);
// Pretend we are doing parsing here...
Console.WriteLine(GeneralUseHelpers.DictAsJson(fileInfo));
MediaParser.CreateSong(db, fileInfo);
}
Console.WriteLine($"Full scan complete! Processed {multimedia.Count} files.");
// Update state inside of DB
string currentLibraryState = MetadataExtractor.GetStringMD5(string.Join("\n", multimedia));
Global lastLibraryState = db.Globals.FirstOrDefault(g => g.Key == "libraryState")
?? new() { Key = "libraryState"};
lastLibraryState.Value = currentLibraryState;
db.Update(lastLibraryState);
await db.SaveChangesAsync();
return multimedia;
}
}

126
Tools/MediaParser.cs Normal file
View File

@@ -0,0 +1,126 @@
using Microsoft.VisualBasic.FileIO;
using Shadow.Data;
using Shadow.Entities;
using System.Security.Cryptography;
namespace Shadow.Tools;
public static class MediaParser
{
/// <summary>
/// Generate a random hex string (with length 32 by default)
/// </summary>
/// <param name="length">Optional: hexstring length</param>
/// <returns>A hexstring of given length</returns>
public static string HexStr(int length = 32)
{
return RandomNumberGenerator.GetHexString(length).ToLower();
}
/// <summary>
/// Get metadata content opportunistically
/// </summary>
/// <param name="metadata">Dictionary to search in</param>
/// <param name="searchStrings">Keywords to search for</param>
/// <returns>Retrieved value (string) on success, otherwise null</returns>
public static string? GetAny(Dictionary<string, string> metadata, List<string> searchStrings)
{
foreach (string searchString in searchStrings)
{
if (metadata.TryGetValue(searchString, out string? value) && !string.IsNullOrEmpty(value))
return value;
}
return null;
}
/// <summary>
/// Map exiftool metadata to Song entity.
/// </summary>
/// <param name="db">Database context</param>
/// <param name="exif">ExifTool metadata</param>
/// <returns>New Song entity, or null if song already exists in db</returns>
public static Song? CreateSong(ApplicationDbContext db, Dictionary<string, string> exif)
{
// First of all, check if song already exists in db
string uri = GetAny(exif, ["_shadow:fileHash"])
?? throw new Exception("Fatal error: could not get file hash!");
if (db.Songs.FirstOrDefault(s => s.Uri == uri) != null) return null;
// If not, extract exif data
string title = GetAny(exif, [
"ItemList:Title", // iTunes m4a
"ItemList:SortName", // iTunes m4a
"Vorbis:Title", // Bandcamp ogg
"ID3v2_3:Title", // Generic mp3/wav ID3 v2.3.0
])
?? Path.Combine(exif["System:Directory"], exif["System:FileName"]);
string filepath = Path.GetFullPath(
Path.Combine(exif["System:Directory"], exif["System:FileName"])
); // TODO: bulletproof this
string filetype = exif["File:FileType"].ToLower();
// Album/artist related
string artistName = GetAny(exif, [
"ItemList:Artist", // iTunes m4a
"ItemList:AlbumArtist", // iTunes m4a
"Vorbis:Artist", // Bandcamp m4a
"Vorbis:Albumartist", // Bandcamp m4a
"ID3v2_3:Artist", // Generic mp3/wav ID3 v2.3.0
]) ?? "[Unknown Artist]"; // this is a weak line of defense against deliberately crafted
string albumName = GetAny(exif, [
"ItemList:Album", // iTunes m4a
"Vorbis:Album", // Bandcamp m4a
"ID3v2_3:Album", // Generic mp3/wav ID3 v2.3.0
]) ?? "[Unknown Album]"; // again, weak line of defense
// Try to find relevant artists and albums
Artist artist = db.Artists
.FirstOrDefault(a => a.NormalizedName == artistName.ToLower())
?? new Artist
{
Name = artistName,
NormalizedName = artistName.ToLower()
};
Album album = db.Albums
.FirstOrDefault(a => a.Name == albumName && a.Artist == artist)
?? new Album
{
Name = albumName,
Uri = HexStr(),
Artist = artist
};
Song song = new()
{
Title = title,
Uri = uri,
Filepath = filepath,
Filetype = filetype,
Album = album,
Artist = artist
};
try
{
// Is Update() safe here?
db.Artists.Update(artist);
db.Albums.Update(album);
db.Songs.Update(song);
artist.Albums.Add(album);
artist.Songs.Add(song);
db.SaveChanges();
}
catch (Exception e)
{
Console.WriteLine("[Error: MediaParser] Failed to extract metadata from {filepath}:\n" +
$"{e}");
}
return song;
}
}

View File

@@ -0,0 +1,67 @@
using SharpExifTool;
using System.Security.Cryptography;
namespace Shadow.Tools;
public static class MetadataExtractor
{
private readonly static ExifTool exifTool = new();
private readonly static GeneralUseHelpers guhf = new();
public async static Task<Dictionary<string, string>> ExtractAsync(string fullPath)
{
// Get all relevant metadata
Dictionary<string, string> fileMetadata = new(await
exifTool.ExtractAllMetadataAsync(fullPath));
// Add in a MD5 hint
string md5sum = await GetFileMD5Async(fullPath);
fileMetadata?.Add("_shadow:fileHash", md5sum);
return fileMetadata ?? [];
}
/// <summary>
/// Compute MD5 checksum of a file
/// </summary>
/// <param name="fullPath">Input file absolute path</param>
/// <returns>MD5 hexstring</returns>
public static async Task<string> GetFileMD5Async(string fullPath)
{
string fallbackValue = String.Empty;
try
{
if (File.Exists(fullPath))
using (MD5 md5 = MD5.Create())
{
using (FileStream stream = File.OpenRead(fullPath))
{
byte[] hashBytes = await md5.ComputeHashAsync(stream);
string hashString = Convert.ToHexStringLower(hashBytes);
return hashString;
}
}
else return fallbackValue;
}
catch
{
return fallbackValue;
}
}
/// <summary>
/// Compute MD5 checksum of a string
/// </summary>
/// <param name="input">Input string for the MD5 hash</param>
/// <returns>MD5 hexstring</returns>
public static string GetStringMD5(string input)
{
// https://stackoverflow.com/a/24031467
byte[] inputBytes = System.Text.Encoding.ASCII.GetBytes(input);
byte[] hashBytes = MD5.HashData(inputBytes);
string hashString = Convert.ToHexStringLower(hashBytes);
return hashString;
}
}

View File

@@ -177,4 +177,41 @@ public class Seeder
return migrationSuccess;
}
/// <summary>
/// Check if the library needs a full rescan
/// </summary>
/// <returns>True if full rescan is needed</returns>
/// <exception cref="MissingFieldException">Thrown when either last library state, path to music library or cache is unknown</exception>
public async Task<bool> ScanPrefetchAsync()
{
bool scanNecessary = false;
Global? lastLibraryState = await db.Globals.FirstOrDefaultAsync(g => g.Key == "libraryState");
Global? libraryPath = await db.Globals.FirstOrDefaultAsync(g => g.Key == "musicLibraryPath");
Global? cachePath = await db.Globals.FirstOrDefaultAsync(g => g.Key == "musicThumbnailPath");
if (libraryPath is null || cachePath is null || lastLibraryState is null)
{
throw new MissingFieldException("[Error] Missing libraryState, musicLibraryPath, musicThumbnailPath. Please rerun the setup wizard with `Shadow setupWizard`.");
}
LibraryWatcher lw = new(libraryPath.Value!, [cachePath.Value!], db);
// Get library contents
List<string> currentLibraryStateList = await lw.GetAllMultimediaAsync();
// Compute their hash
string currentLibraryStateString = string.Join("\n", currentLibraryStateList);
string currentLibraryState = MetadataExtractor.GetStringMD5(currentLibraryStateString);
// Compare against last known library state
if (currentLibraryState != lastLibraryState.Value)
scanNecessary = true;
// The contents changed? Initiate a full rescan, then call LibraryWatcher.
if (scanNecessary)
await lw.PerformFullScanAsync();
// State seems identical? Launch just the LibraryWatcher.
// TODO: lw.Watch()...
return scanNecessary;
}
}