mirror of
https://github.com/yattee/yattee.git
synced 2024-12-13 22:00:31 +05:30
faster chapter extraction
The extraction of chapters is now faster since it is run in parallel for each pattern. Also a new pattern hast been added: "(start) title"
This commit is contained in:
parent
90777d91f6
commit
c9fb41c8e8
@ -152,58 +152,94 @@ extension VideosAPI {
|
|||||||
/*
|
/*
|
||||||
The following chapter patterns are covered:
|
The following chapter patterns are covered:
|
||||||
|
|
||||||
start - end - title / start - end: Title / start - end title
|
1) "start - end - title" / "start - end: Title" / "start - end title"
|
||||||
start - title / start: title / start title / [start] - title / [start]: title / [start] title
|
2) "start - title" / "start: title" / "start title" / "[start] - title" / "[start]: title" / "[start] title"
|
||||||
index. title - start / index. title start
|
3) "index. title - start" / "index. title start"
|
||||||
title: (start)
|
4) "title: (start)"
|
||||||
|
5) "(start) title"
|
||||||
|
|
||||||
The order is important!
|
These represent:
|
||||||
|
|
||||||
|
- "start" and "end" are timestamps, defining the start and end of the individual chapter
|
||||||
|
- "title" is the name of the chapter
|
||||||
|
- "index" is the chapter's position in a list
|
||||||
|
|
||||||
|
The order of these patterns is important as it determines the priority. The patterns listed first have a higher priority.
|
||||||
|
In the case of multiple matches, the pattern with the highest priority will be chosen - lower number means higher priority.
|
||||||
*/
|
*/
|
||||||
let patterns = [
|
let patterns = [
|
||||||
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?(?:\\s*-\\s*)?(?<end>(?:[0-9]+:){1,2}[0-9]+)?(?:\\s*-\\s*|\\s*[:]\\s*)?(?<title>.*)(?=\\n|$)",
|
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?(?:\\s*-\\s*)?(?<end>(?:[0-9]+:){1,2}[0-9]+)?(?:\\s*-\\s*|\\s*[:]\\s*)?(?<title>.*)(?=\\n|$)",
|
||||||
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?\\s*[-:]?\\s*(?<title>.+)(?=\\n|$)",
|
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?\\s*[-:]?\\s*(?<title>.+)(?=\\n|$)",
|
||||||
"(?<=\\n|^)(?<index>[0-9]+\\.\\s)(?<title>.+?)(?:\\s*-\\s*)?(?<start>(?:[0-9]+:){1,2}[0-9]+)(?=\\n|$)",
|
"(?<=\\n|^)(?<index>[0-9]+\\.\\s)(?<title>.+?)(?:\\s*-\\s*)?(?<start>(?:[0-9]+:){1,2}[0-9]+)(?=\\n|$)",
|
||||||
"(?<=\\n|^)(?<title>.+?):\\s*\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)(?=\\n|$)"
|
"(?<=\\n|^)(?<title>.+?):\\s*\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)(?=\\n|$)",
|
||||||
|
"(?<=^|\\n)\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)\\s*(?<title>.+?)(?=\\n|$)"
|
||||||
]
|
]
|
||||||
|
|
||||||
for pattern in patterns {
|
let extractChaptersGroup = DispatchGroup()
|
||||||
guard let chaptersRegularExpression = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) else { continue }
|
var capturedChapters: [Int: [Chapter]] = [:]
|
||||||
let chapterLines = chaptersRegularExpression.matches(in: description, range: NSRange(description.startIndex..., in: description))
|
let lock = NSLock()
|
||||||
|
|
||||||
if !chapterLines.isEmpty {
|
for (index, pattern) in patterns.enumerated() {
|
||||||
return chapterLines.compactMap { line in
|
extractChaptersGroup.enter()
|
||||||
let titleRange = line.range(withName: "title")
|
DispatchQueue.global().async {
|
||||||
let startRange = line.range(withName: "start")
|
if let chaptersRegularExpression = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) {
|
||||||
guard let titleSubstringRange = Range(titleRange, in: description),
|
let chapterLines = chaptersRegularExpression.matches(in: description, range: NSRange(description.startIndex..., in: description))
|
||||||
let startSubstringRange = Range(startRange, in: description)
|
let extractedChapters = chapterLines.compactMap { line -> Chapter? in
|
||||||
else {
|
let titleRange = line.range(withName: "title")
|
||||||
return nil
|
let startRange = line.range(withName: "start")
|
||||||
}
|
|
||||||
let titleCapture = String(description[titleSubstringRange]).trimmingCharacters(in: .whitespaces)
|
|
||||||
let startCapture = String(description[startSubstringRange])
|
|
||||||
let startComponents = startCapture.components(separatedBy: ":")
|
|
||||||
guard startComponents.count <= 3 else { return nil }
|
|
||||||
|
|
||||||
var hours: Double?
|
guard let titleSubstringRange = Range(titleRange, in: description),
|
||||||
var minutes: Double?
|
let startSubstringRange = Range(startRange, in: description)
|
||||||
var seconds: Double?
|
else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
if startComponents.count == 3 {
|
let titleCapture = String(description[titleSubstringRange]).trimmingCharacters(in: .whitespaces)
|
||||||
hours = Double(startComponents[0])
|
let startCapture = String(description[startSubstringRange])
|
||||||
minutes = Double(startComponents[1])
|
let startComponents = startCapture.components(separatedBy: ":")
|
||||||
seconds = Double(startComponents[2])
|
guard startComponents.count <= 3 else { return nil }
|
||||||
} else if startComponents.count == 2 {
|
|
||||||
minutes = Double(startComponents[0])
|
var hours: Double?
|
||||||
seconds = Double(startComponents[1])
|
var minutes: Double?
|
||||||
|
var seconds: Double?
|
||||||
|
|
||||||
|
if startComponents.count == 3 {
|
||||||
|
hours = Double(startComponents[0])
|
||||||
|
minutes = Double(startComponents[1])
|
||||||
|
seconds = Double(startComponents[2])
|
||||||
|
} else if startComponents.count == 2 {
|
||||||
|
minutes = Double(startComponents[0])
|
||||||
|
seconds = Double(startComponents[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
guard var startSeconds = seconds else { return nil }
|
||||||
|
|
||||||
|
startSeconds += (minutes ?? 0) * 60
|
||||||
|
startSeconds += (hours ?? 0) * 60 * 60
|
||||||
|
|
||||||
|
return Chapter(title: titleCapture, start: startSeconds)
|
||||||
}
|
}
|
||||||
|
|
||||||
guard var startSeconds = seconds else { return nil }
|
if !extractedChapters.isEmpty {
|
||||||
|
lock.lock()
|
||||||
startSeconds += (minutes ?? 0) * 60
|
capturedChapters[index] = extractedChapters
|
||||||
startSeconds += (hours ?? 0) * 60 * 60
|
lock.unlock()
|
||||||
|
}
|
||||||
return .init(title: titleCapture, start: startSeconds)
|
|
||||||
}
|
}
|
||||||
|
extractChaptersGroup.leave()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extractChaptersGroup.wait()
|
||||||
|
|
||||||
|
// Now we sort the keys of the capturedChapters dictionary.
|
||||||
|
// These keys correspond to the priority of each pattern.
|
||||||
|
let sortedKeys = Array(capturedChapters.keys).sorted(by: <)
|
||||||
|
|
||||||
|
// Return first non-empty result in the order of patterns
|
||||||
|
for key in sortedKeys {
|
||||||
|
if let chapters = capturedChapters[key], !chapters.isEmpty {
|
||||||
|
return chapters
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return []
|
return []
|
||||||
|
Loading…
Reference in New Issue
Block a user