1
0
mirror of https://github.com/yattee/yattee.git synced 2025-04-28 07:50:33 +05:30

Merge pull request #597 from stonerl/regex-for-chapters

more robust regex for chapters from description
This commit is contained in:
Arkadiusz Fal 2024-01-09 16:55:16 +01:00 committed by GitHub
commit 7ff52294a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -144,24 +144,37 @@ extension VideosAPI {
} }
func extractChapters(from description: String) -> [Chapter] { func extractChapters(from description: String) -> [Chapter] {
guard let chaptersRegularExpression = try? NSRegularExpression( /*
pattern: "(?<start>(?:[0-9]+:){1,}(?:[0-9]+))(?:\\s)+(?:- ?)?(?<title>.*)", The following chapter patterns are covered:
options: .caseInsensitive
) else { return [] }
let chapterLines = chaptersRegularExpression.matches( start - end - title / start - end: Title / start - end title
in: description, start - title / start: title / start title / [start] - title / [start]: title / [start] title
range: NSRange(description.startIndex..., in: description) index. title - start / index. title start
) title: (start)
The order is important!
*/
let patterns = [
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?(?:\\s*-\\s*)?(?<end>(?:[0-9]+:){1,2}[0-9]+)?(?:\\s*-\\s*|\\s*[:]\\s*)?(?<title>.*)(?=\\n|$)",
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?\\s*[-:]?\\s*(?<title>.+)(?=\\n|$)",
"(?<=\\n|^)(?<index>[0-9]+\\.\\s)(?<title>.+?)(?:\\s*-\\s*)?(?<start>(?:[0-9]+:){1,2}[0-9]+)(?=\\n|$)",
"(?<=\\n|^)(?<title>.+?):\\s*\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)(?=\\n|$)"
]
for pattern in patterns {
guard let chaptersRegularExpression = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) else { continue }
let chapterLines = chaptersRegularExpression.matches(in: description, range: NSRange(description.startIndex..., in: description))
if !chapterLines.isEmpty {
return chapterLines.compactMap { line in return chapterLines.compactMap { line in
let titleRange = line.range(withName: "title") let titleRange = line.range(withName: "title")
let startRange = line.range(withName: "start") let startRange = line.range(withName: "start")
guard let titleSubstringRange = Range(titleRange, in: description), guard let titleSubstringRange = Range(titleRange, in: description),
let startSubstringRange = Range(startRange, in: description) else { return nil } let startSubstringRange = Range(startRange, in: description)
else {
let titleCapture = String(description[titleSubstringRange]) return nil
}
let titleCapture = String(description[titleSubstringRange]).trimmingCharacters(in: .whitespaces)
let startCapture = String(description[startSubstringRange]) let startCapture = String(description[startSubstringRange])
let startComponents = startCapture.components(separatedBy: ":") let startComponents = startCapture.components(separatedBy: ":")
guard startComponents.count <= 3 else { return nil } guard startComponents.count <= 3 else { return nil }
@ -181,15 +194,13 @@ extension VideosAPI {
guard var startSeconds = seconds else { return nil } guard var startSeconds = seconds else { return nil }
if let minutes { startSeconds += (minutes ?? 0) * 60
startSeconds += 60 * minutes startSeconds += (hours ?? 0) * 60 * 60
}
if let hours {
startSeconds += 60 * 60 * hours
}
return .init(title: titleCapture, start: startSeconds) return .init(title: titleCapture, start: startSeconds)
} }
} }
} }
return []
}
}