API mostly working, starting to work on webapp

This commit is contained in:
Cameron
2024-08-23 23:52:36 -05:00
commit a4403ce17b
26 changed files with 1725 additions and 0 deletions

211
Background/ComicAnalyzer.cs Normal file
View File

@@ -0,0 +1,211 @@
using Microsoft.AspNetCore.Routing.Constraints;
using Microsoft.AspNetCore.StaticFiles;
using SharpCompress.Archives;
using SharpCompress.Archives.Rar;
using SharpCompress.Archives.SevenZip;
using System.Diagnostics;
using System.IO.Compression;
using System.IO.Hashing;
using System.Linq;
namespace ComiServ.Background
{
public record class ComicAnalysis
(
long FileSizeBytes,
int PageCount,
Int64 Xxhash
);
public record class ComicPage
(
string Filename,
string Mime,
byte[] Data
);
public interface IComicAnalyzer
{
public static readonly IReadOnlyList<string> ZIP_EXTS = [".cbz", ".zip"];
public static readonly IReadOnlyList<string> RAR_EXTS = [".cbr", ".rar"];
public static readonly IReadOnlyList<string> ZIP7_EXTS = [".cb7", ".7z"];
//returns null on invalid filetype, throws on analysis error
public ComicAnalysis? AnalyzeComic(string filename);
public Task<ComicAnalysis?> AnalyzeComicAsync(string filename);
//returns null if out of range, throws for file error
public ComicPage? GetComicPage(string filepath, int page);
//based purely on filename, doesn't try to open file
//returns null for ALL UNRECOGNIZED OR NON-IMAGES
public static string? GetImageMime(string filename)
{
if (new FileExtensionContentTypeProvider().TryGetContentType(filename, out string _mime))
{
if (_mime.StartsWith("image"))
return _mime;
}
return null;
}
}
//async methods actually just block
public class SynchronousComicAnalyzer(ILogger<IComicAnalyzer>? logger)
: IComicAnalyzer
{
private readonly ILogger<IComicAnalyzer>? _logger = logger;
public ComicAnalysis? AnalyzeComic(string filepath)
{
_logger?.LogTrace($"Analyzing comic: {filepath}");
var ext = new FileInfo(filepath).Extension.ToLower();
if (IComicAnalyzer.ZIP_EXTS.Contains(ext))
return ZipAnalyze(filepath);
else if (IComicAnalyzer.RAR_EXTS.Contains(ext))
return RarAnalyze(filepath);
else if (IComicAnalyzer.ZIP7_EXTS.Contains(ext))
return Zip7Analyze(filepath);
else
//throw new ArgumentException("Cannot analyze this file type");
return null;
}
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
public async Task<ComicAnalysis?> AnalyzeComicAsync(string filename)
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
{
return AnalyzeComic(filename);
}
protected ComicAnalysis ZipAnalyze(string filepath)
{
var filedata = File.ReadAllBytes(filepath);
var hash = ComputeHash(filedata);
using var stream = new MemoryStream(filedata);
using var archive = new ZipArchive(stream, ZipArchiveMode.Read, false);
return new
(
FileSizeBytes: filedata.LongLength,
PageCount: archive.Entries.Count,
Xxhash: hash
);
}
protected ComicAnalysis RarAnalyze(string filepath)
{
var filedata = File.ReadAllBytes(filepath);
var hash = ComputeHash(filedata);
using var stream = new MemoryStream(filedata);
using var rar = RarArchive.Open(stream, new SharpCompress.Readers.ReaderOptions()
{
LeaveStreamOpen = false
});
return new
(
FileSizeBytes: filedata.LongLength,
PageCount: rar.Entries.Count,
Xxhash: hash
);
}
protected ComicAnalysis Zip7Analyze(string filepath)
{
var filedata = File.ReadAllBytes(filepath);
var hash = ComputeHash(filedata);
using var stream = new MemoryStream(filedata);
using var zip7 = SevenZipArchive.Open(stream, new SharpCompress.Readers.ReaderOptions()
{
LeaveStreamOpen = false
});
return new
(
FileSizeBytes: filedata.LongLength,
PageCount: zip7.Entries.Count,
Xxhash: hash
);
}
protected static Int64 ComputeHash(ReadOnlySpan<byte> data)
=> unchecked((Int64)XxHash64.HashToUInt64(data));
public ComicPage? GetComicPage(string filepath, int page)
{
var fi = new FileInfo(filepath);
var ext = fi.Extension;
if (IComicAnalyzer.ZIP_EXTS.Contains(ext))
return GetPageZip(filepath, page);
else if (IComicAnalyzer.RAR_EXTS.Contains(ext))
return GetPageRar(filepath, page);
else if (IComicAnalyzer.ZIP7_EXTS.Contains(ext))
return GetPage7Zip(filepath, page);
else return null;
}
protected ComicPage? GetPageZip(string filepath, int page)
{
Debug.Assert(page >= 1, "Page number must be positive");
try
{
using var fileStream = new FileStream(filepath, FileMode.Open);
using var arc = new ZipArchive(fileStream, ZipArchiveMode.Read, false);
(var entry, var mime) = arc.Entries
.Select((ZipArchiveEntry e) => (e, IComicAnalyzer.GetImageMime(e.Name)))
.Where(static pair => pair.Item2 is not null)
.OrderBy(static pair => pair.Item1.FullName)
.Skip(page - 1)
.FirstOrDefault();
if (entry is null || mime is null)
return null;
using var pageStream = entry.Open();
using var pageStream2 = new MemoryStream();
pageStream.CopyTo(pageStream2);
pageStream2.Seek(0, SeekOrigin.Begin);
var pageData = pageStream2.ToArray();
return new
(
Filename: entry.Name,
Mime: mime,
Data: pageData
);
}
catch (FileNotFoundException)
{
return null;
}
catch (DirectoryNotFoundException)
{
return null;
}
}
protected ComicPage? GetPageRar(string filepath, int page)
{
using var rar = RarArchive.Open(filepath);
(var entry, var mime) = rar.Entries
.Select((RarArchiveEntry e) => (e, IComicAnalyzer.GetImageMime(e.Key)))
.Where(static pair => pair.Item2 is not null)
.OrderBy(static pair => pair.Item1.Key)
.Skip(page - 1)
.FirstOrDefault();
if (entry is null || mime is null)
return null;
using var stream = new MemoryStream();
entry.WriteTo(stream);
var pageData = stream.ToArray();
return new
(
Filename: entry.Key ?? "",
Mime: mime,
Data: pageData
);
}
protected ComicPage? GetPage7Zip(string filepath, int page)
{
using var zip7 = SevenZipArchive.Open(filepath);
(var entry, var mime) = zip7.Entries
.Select((SevenZipArchiveEntry e) => (e, IComicAnalyzer.GetImageMime(e.Key)))
.Where(static pair => pair.Item2 is not null)
.OrderBy(static pair => pair.Item1.Key)
.Skip(page - 1)
.FirstOrDefault();
if (entry is null || mime is null)
return null;
using var stream = new MemoryStream();
entry.WriteTo(stream);
var pageData = stream.ToArray();
return new
(
Filename: entry.Key ?? "",
Mime: mime,
Data: pageData
);
}
}
}

168
Background/ComicScanner.cs Normal file
View File

@@ -0,0 +1,168 @@
using System.Collections.Generic;
using System.Runtime.InteropServices;
using ComiServ.Controllers;
using ComiServ.Entities;
using Microsoft.OpenApi.Writers;
namespace ComiServ.Background
{
public record class ComicScanItem
(
string Filepath,
long FileSizeBytes,
Int64 Xxhash,
int PageCount
);
public interface IComicScanner : IDisposable
{
//TODO should be configurable
public static readonly IReadOnlyList<string> COMIC_EXTENSIONS = [
"cbz", "zip",
"cbr", "rar",
"cb7", "7zip",
];
public void TriggerLibraryScan();
public void ScheduleRepeatedLibraryScans(TimeSpan period);
public IDictionary<string, ComicScanItem> PerfomLibraryScan(CancellationToken? token = null);
}
public class ComicScanner(
IServiceProvider provider
) : IComicScanner
{
//private readonly ComicsContext _context = context;
private readonly ITaskManager _manager = provider.GetRequiredService<ITaskManager>();
private readonly Configuration _config = provider.GetRequiredService<IConfigService>().Config;
private readonly IComicAnalyzer _analyzer = provider.GetRequiredService<IComicAnalyzer>();
private readonly IServiceProvider _provider = provider;
public IDictionary<string, ComicScanItem> PerfomLibraryScan(CancellationToken? token = null)
{
return new DirectoryInfo(_config.LibraryRoot).EnumerateFiles("*", SearchOption.AllDirectories)
.Select(fi =>
{
token?.ThrowIfCancellationRequested();
var path = Path.GetRelativePath(_config.LibraryRoot, fi.FullName);
var analysis = _analyzer.AnalyzeComic(fi.FullName);
if (analysis is null)
//null will be filtered
return (path, null);
return (path, new ComicScanItem
(
Filepath: path,
FileSizeBytes: analysis.FileSizeBytes,
Xxhash: analysis.Xxhash,
PageCount: analysis.PageCount
));
})
//ignore files of the wrong extension
.Where(p => p.Item2 is not null)
.ToDictionary();
}
public void TriggerLibraryScan()
{
TaskItem ti = new(
TaskTypes.Scan,
"Library Scan",
token =>
{
var items = PerfomLibraryScan(token);
token?.ThrowIfCancellationRequested();
UpdateDatabaseWithScanResults(items);
},
null);
_manager.StartTask(ti);
}
private CancellationTokenSource? RepeatedLibraryScanTokenSource = null;
public void ScheduleRepeatedLibraryScans(TimeSpan interval)
{
RepeatedLibraryScanTokenSource?.Cancel();
RepeatedLibraryScanTokenSource?.Dispose();
RepeatedLibraryScanTokenSource = new();
TaskItem ti = new(
TaskTypes.Scan,
"Scheduled Library Scan",
token =>
{
var items = PerfomLibraryScan(token);
token?.ThrowIfCancellationRequested();
UpdateDatabaseWithScanResults(items);
},
RepeatedLibraryScanTokenSource.Token);
_manager.ScheduleTask(ti, interval);
}
public void UpdateDatabaseWithScanResults(IDictionary<string, ComicScanItem> items)
{
using var scope = _provider.CreateScope();
var services = scope.ServiceProvider;
using var context = services.GetRequiredService<ComicsContext>();
//not an ideal algorithm
//need to go through every comic in the database to update `Exists`
//also need to go through every discovered comic to add new ones
//and should make sure not to double up on the overlaps
//there should be a faster method than using ExceptBy but I don't it's urgent
//TODO profile on large database
SortedSet<string> alreadyExistingFiles = [];
foreach (var comic in context.Comics)
{
ComicScanItem info;
if (items.TryGetValue(comic.Filepath, out info))
{
comic.FileXxhash64 = info.Xxhash;
comic.Exists = true;
comic.PageCount = info.PageCount;
comic.SizeBytes = info.FileSizeBytes;
alreadyExistingFiles.Add(comic.Filepath);
}
else
{
comic.Exists = false;
}
}
var newComics = items.ExceptBy(alreadyExistingFiles, p => p.Key).Select(p =>
new Comic()
{
Handle = context.CreateHandle(),
Exists = true,
Filepath = p.Value.Filepath,
Title = new FileInfo(p.Value.Filepath).Name,
Description = "",
SizeBytes = p.Value.FileSizeBytes,
FileXxhash64 = p.Value.Xxhash,
PageCount = p.Value.PageCount
}).ToList();
newComics.ForEach(c => _manager.StartTask(new(
TaskTypes.GetCover,
$"Get Cover: {c.Title}",
token => InsertCover(Path.Join(_config.LibraryRoot, c.Filepath), c.FileXxhash64)
)));
context.Comics.AddRange(newComics);
context.SaveChanges();
}
protected void InsertCover(string filepath, long hash)
{
using var scope = _provider.CreateScope();
var services = scope.ServiceProvider;
using var context = services.GetRequiredService<ComicsContext>();
var existing = context.Covers.SingleOrDefault(c => c.FileXxhash64 == hash);
//assuming no hash overlap
//if you already have a cover, assume it's correct
if (existing is not null)
return;
var page = _analyzer.GetComicPage(filepath, 1);
if (page is null)
return;
context.Covers.Add(new()
{
FileXxhash64 = hash,
Filename = page.Filename,
CoverFile = page.Data
});
context.SaveChanges();
}
public void Dispose()
{
RepeatedLibraryScanTokenSource?.Dispose();
}
}
}

98
Background/TaskManager.cs Normal file
View File

@@ -0,0 +1,98 @@
using System.Collections.Concurrent;
namespace ComiServ.Background
{
public enum TaskTypes
{
Scan,
GetCover,
}
//task needs to use the token parameter rather than its own token, because it gets merged with the master token
public class TaskItem(TaskTypes type, string name, Action<CancellationToken?> action, CancellationToken? token = null)
{
public readonly TaskTypes Type = type;
public readonly string Name = name;
public readonly Action<CancellationToken?> Action = action;
public readonly CancellationToken Token = token ?? CancellationToken.None;
}
public interface ITaskManager : IDisposable
{
public void StartTask(TaskItem taskItem);
public void ScheduleTask(TaskItem taskItem, TimeSpan interval);
public string[] GetTasks(int limit);
public void CancelAll();
}
public class TaskManager(ILogger<ITaskManager>? logger)
: ITaskManager
{
private readonly ConcurrentDictionary<Task, TaskItem> ActiveTasks = [];
private readonly CancellationTokenSource MasterToken = new();
private readonly ILogger<ITaskManager>? _logger = logger;
private readonly ConcurrentDictionary<System.Timers.Timer,TaskItem> Scheduled = [];
public void StartTask(TaskItem taskItem)
{
_logger?.LogTrace($"Start Task: {taskItem.Name}");
var tokenSource = CancellationTokenSource.CreateLinkedTokenSource(MasterToken.Token, taskItem.Token);
var newTask = Task.Run(() => taskItem.Action(tokenSource.Token),
tokenSource.Token);
if (!ActiveTasks.TryAdd(newTask, taskItem))
{
//TODO better exception
throw new Exception("failed to add task");
}
//TODO should master token actually cancel followup?
newTask.ContinueWith(ManageFinishedTasks, MasterToken.Token);
}
public void ScheduleTask(TaskItem taskItem, TimeSpan interval)
{
//var timer = new Timer((_) => StartTask(taskItem), null, dueTime, period ?? Timeout.InfiniteTimeSpan);
var timer = new System.Timers.Timer(interval);
var token = CancellationTokenSource.CreateLinkedTokenSource(MasterToken.Token, taskItem.Token);
Scheduled.TryAdd(timer, taskItem);
token.Token.Register(() =>
{
timer.Stop();
Scheduled.TryRemove(timer, out var _);
});
timer.Elapsed += (_, _) => taskItem.Action(token.Token);
timer.Start();
}
public string[] GetTasks(int limit)
{
return ActiveTasks.Select(p => p.Value.Name).Take(limit).ToArray();
}
public void CancelAll()
{
MasterToken.Cancel();
}
public void ManageFinishedTasks()
{
ManageFinishedTasks(null);
}
private readonly object _TaskCleanupLock = new();
protected void ManageFinishedTasks(Task? cause = null)
{
//there shouldn't really be concerns with running multiple simultaneously but might as well
lock (_TaskCleanupLock)
{
//cache first because we're modifying the dictionary
foreach (var pair in ActiveTasks.ToArray())
{
if (pair.Key.IsCompleted)
{
bool taskRemoved = ActiveTasks.TryRemove(pair.Key, out _);
if (taskRemoved)
{
_logger?.LogTrace($"Removed Task: {pair.Value.Name}");
}
}
}
}
}
public void Dispose()
{
MasterToken?.Dispose();
}
}
}