1
0
mirror of https://github.com/yattee/yattee.git synced 2025-01-06 01:20:31 +05:30

faster chapter extraction

The extraction of chapters is now faster since it is run in parallel for each pattern. Also a new pattern hast been added: "(start) title"
This commit is contained in:
Toni Förster 2024-05-19 17:43:35 +02:00
parent 90777d91f6
commit c9fb41c8e8
No known key found for this signature in database
GPG Key ID: 292F3E5086C83FC7

View File

@ -152,33 +152,48 @@ extension VideosAPI {
/* /*
The following chapter patterns are covered: The following chapter patterns are covered:
start - end - title / start - end: Title / start - end title 1) "start - end - title" / "start - end: Title" / "start - end title"
start - title / start: title / start title / [start] - title / [start]: title / [start] title 2) "start - title" / "start: title" / "start title" / "[start] - title" / "[start]: title" / "[start] title"
index. title - start / index. title start 3) "index. title - start" / "index. title start"
title: (start) 4) "title: (start)"
5) "(start) title"
The order is important! These represent:
- "start" and "end" are timestamps, defining the start and end of the individual chapter
- "title" is the name of the chapter
- "index" is the chapter's position in a list
The order of these patterns is important as it determines the priority. The patterns listed first have a higher priority.
In the case of multiple matches, the pattern with the highest priority will be chosen - lower number means higher priority.
*/ */
let patterns = [ let patterns = [
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?(?:\\s*-\\s*)?(?<end>(?:[0-9]+:){1,2}[0-9]+)?(?:\\s*-\\s*|\\s*[:]\\s*)?(?<title>.*)(?=\\n|$)", "(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?(?:\\s*-\\s*)?(?<end>(?:[0-9]+:){1,2}[0-9]+)?(?:\\s*-\\s*|\\s*[:]\\s*)?(?<title>.*)(?=\\n|$)",
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?\\s*[-:]?\\s*(?<title>.+)(?=\\n|$)", "(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?\\s*[-:]?\\s*(?<title>.+)(?=\\n|$)",
"(?<=\\n|^)(?<index>[0-9]+\\.\\s)(?<title>.+?)(?:\\s*-\\s*)?(?<start>(?:[0-9]+:){1,2}[0-9]+)(?=\\n|$)", "(?<=\\n|^)(?<index>[0-9]+\\.\\s)(?<title>.+?)(?:\\s*-\\s*)?(?<start>(?:[0-9]+:){1,2}[0-9]+)(?=\\n|$)",
"(?<=\\n|^)(?<title>.+?):\\s*\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)(?=\\n|$)" "(?<=\\n|^)(?<title>.+?):\\s*\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)(?=\\n|$)",
"(?<=^|\\n)\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)\\s*(?<title>.+?)(?=\\n|$)"
] ]
for pattern in patterns { let extractChaptersGroup = DispatchGroup()
guard let chaptersRegularExpression = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) else { continue } var capturedChapters: [Int: [Chapter]] = [:]
let chapterLines = chaptersRegularExpression.matches(in: description, range: NSRange(description.startIndex..., in: description)) let lock = NSLock()
if !chapterLines.isEmpty { for (index, pattern) in patterns.enumerated() {
return chapterLines.compactMap { line in extractChaptersGroup.enter()
DispatchQueue.global().async {
if let chaptersRegularExpression = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) {
let chapterLines = chaptersRegularExpression.matches(in: description, range: NSRange(description.startIndex..., in: description))
let extractedChapters = chapterLines.compactMap { line -> Chapter? in
let titleRange = line.range(withName: "title") let titleRange = line.range(withName: "title")
let startRange = line.range(withName: "start") let startRange = line.range(withName: "start")
guard let titleSubstringRange = Range(titleRange, in: description), guard let titleSubstringRange = Range(titleRange, in: description),
let startSubstringRange = Range(startRange, in: description) let startSubstringRange = Range(startRange, in: description)
else { else {
return nil return nil
} }
let titleCapture = String(description[titleSubstringRange]).trimmingCharacters(in: .whitespaces) let titleCapture = String(description[titleSubstringRange]).trimmingCharacters(in: .whitespaces)
let startCapture = String(description[startSubstringRange]) let startCapture = String(description[startSubstringRange])
let startComponents = startCapture.components(separatedBy: ":") let startComponents = startCapture.components(separatedBy: ":")
@ -202,8 +217,29 @@ extension VideosAPI {
startSeconds += (minutes ?? 0) * 60 startSeconds += (minutes ?? 0) * 60
startSeconds += (hours ?? 0) * 60 * 60 startSeconds += (hours ?? 0) * 60 * 60
return .init(title: titleCapture, start: startSeconds) return Chapter(title: titleCapture, start: startSeconds)
} }
if !extractedChapters.isEmpty {
lock.lock()
capturedChapters[index] = extractedChapters
lock.unlock()
}
}
extractChaptersGroup.leave()
}
}
extractChaptersGroup.wait()
// Now we sort the keys of the capturedChapters dictionary.
// These keys correspond to the priority of each pattern.
let sortedKeys = Array(capturedChapters.keys).sorted(by: <)
// Return first non-empty result in the order of patterns
for key in sortedKeys {
if let chapters = capturedChapters[key], !chapters.isEmpty {
return chapters
} }
} }
return [] return []