Skip to content

Commit 43178e4

Browse files
committed
Added support for captions extraction.
1 parent b489070 commit 43178e4

File tree

13 files changed

+389
-27
lines changed

13 files changed

+389
-27
lines changed

Sources/YouTubeKit/BaseProtocols/Continuation/ResponseContinuation.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ public protocol ResponseContinuation: YouTubeResponse {
1515
var continuationToken: String? { get set }
1616

1717
/// Results of the continuation search.
18-
var results: [ResultsType] { get set }
18+
var results: [ResultsType] { get set }
1919
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//
2+
// YouTubeVideo+getCaptions.swift
3+
//
4+
//
5+
// Created by Antoine Bollengier on 27.06.2024.
6+
// Copyright © 2024 Antoine Bollengier (github.com/b5i). All rights reserved.
7+
//
8+
9+
public extension YouTubeVideo {
10+
/// Get the captions for the current video.
11+
static func getCaptions(youtubeModel: YouTubeModel, captionType: YTCaption, result: @escaping @Sendable (Result<VideoCaptionsResponse, Error>) -> Void) {
12+
VideoCaptionsResponse.sendNonThrowingRequest(youtubeModel: youtubeModel, data: [.customURL: captionType.url.absoluteString], result: { response in
13+
switch response {
14+
case .success(let data):
15+
result(.success(data))
16+
case .failure(let error):
17+
result(.failure(error))
18+
}
19+
})
20+
}
21+
22+
/// Get the captions for the current video.
23+
@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
24+
static func getCaptionsThrowing(youtubeModel: YouTubeModel, captionType: YTCaption) async throws -> VideoCaptionsResponse {
25+
return try await withCheckedThrowingContinuation({ (continuation: CheckedContinuation<VideoCaptionsResponse, Error>) in
26+
self.getCaptions(youtubeModel: youtubeModel, captionType: captionType, result: { result in
27+
continuation.resume(with: result)
28+
})
29+
})
30+
}
31+
32+
/// Get the captions for the current video.
33+
@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
34+
static func getCaptions(youtubeModel: YouTubeModel, captionType: YTCaption) async -> Result<VideoCaptionsResponse, Error> {
35+
return await withCheckedContinuation({ (continuation: CheckedContinuation<Result<VideoCaptionsResponse, Error>, Never>) in
36+
self.getCaptions(youtubeModel: youtubeModel, captionType: captionType, result: { result in
37+
continuation.resume(returning: result)
38+
})
39+
})
40+
}
41+
}

Sources/YouTubeKit/BaseProtocols/Video/YouTubeVideo.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,4 +189,15 @@ public protocol YouTubeVideo {
189189
///
190190
/// Requires a ``YouTubeModel`` where ``YouTubeModel/cookies`` is defined.
191191
func removeLikeFromVideo(youtubeModel: YouTubeModel) async -> Error?
192+
193+
/// Get the captions for the current video.
194+
static func getCaptions(youtubeModel: YouTubeModel, captionType: YTCaption, result: @escaping @Sendable (Result<VideoCaptionsResponse, Error>) -> Void)
195+
196+
/// Get the captions for the current video.
197+
@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
198+
static func getCaptionsThrowing(youtubeModel: YouTubeModel, captionType: YTCaption) async throws -> VideoCaptionsResponse
199+
200+
/// Get the captions for the current video.
201+
@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
202+
static func getCaptions(youtubeModel: YouTubeModel, captionType: YTCaption) async -> Result<VideoCaptionsResponse, Error>
192203
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//
2+
// YTCaption.swift
3+
//
4+
//
5+
// Created by Antoine Bollengier on 27.06.2024.
6+
// Copyright © 2024 Antoine Bollengier (github.com/b5i). All rights reserved.
7+
//
8+
9+
import Foundation
10+
11+
public struct YTCaption: Sendable {
12+
public var languageCode: String
13+
14+
public var languageName: String
15+
16+
public var url: URL
17+
18+
public var isTranslated: Bool
19+
20+
public init(languageCode: String, languageName: String, url: URL, isTranslated: Bool) {
21+
self.languageCode = languageCode
22+
self.languageName = languageName
23+
self.url = url
24+
self.isTranslated = isTranslated
25+
}
26+
}

Sources/YouTubeKit/ErrorHandling/ParameterValidator+commonValidators.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,16 @@ public extension ParameterValidator {
7676
return .success(privacy)
7777
}
7878
})
79+
80+
static let urlValidator = ParameterValidator(needExistence: true, validator: { url in
81+
let validatorName = "URL validator"
82+
83+
guard let url = url else { return .failure(.init(reason: "Nil value.", validatorFailedNameDescriptor: validatorName)) } // should never be called because of the needExistence
84+
85+
if URL(string: url) != nil {
86+
return .success(url)
87+
} else {
88+
return .failure(.init(reason: "Given url is not a valid URL.", validatorFailedNameDescriptor: validatorName))
89+
}
90+
})
7991
}

Sources/YouTubeKit/HeaderTypes+RawRepresentable.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ extension HeaderTypes: RawRepresentable {
7777
return "subscribeToChannelHeaders"
7878
case .unsubscribeFromChannelHeaders:
7979
return "unsubscribeFromChannelHeaders"
80+
case .videoCaptionsHeaders:
81+
return "videoCaptionsHeaders"
8082
case .customHeaders(let stringIdentifier):
8183
return stringIdentifier
8284
}

Sources/YouTubeKit/HeaderTypes.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ public enum HeaderTypes: Codable, Sendable {
147147
/// - Parameter browseId: The channel's id should be taken from ``YTChannel/channelId`` or ``YTLittleChannelInfos/channelId``.
148148
case unsubscribeFromChannelHeaders
149149

150+
case videoCaptionsHeaders
151+
150152
/// For custom headers
151153
case customHeaders(String)
152154
}

Sources/YouTubeKit/HeadersList.swift

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,13 @@ public struct HeadersList: Codable {
156156
case params
157157
case visitorData
158158

159-
///Those are used during the modification of a playlist
159+
/// Those are used during the modification of a playlist
160160
case movingVideoId
161161
case videoBeforeId
162162
case playlistEditToken
163+
164+
/// Used to completly replace the URL of the request, including the parameters that could potentially
165+
case customURL
163166
}
164167
}
165168

@@ -170,9 +173,9 @@ public struct HeadersList: Codable {
170173
/// - Returns: An `URLRequest`built with the provided parameters and headers.
171174
public static func setHeadersAgentFor(
172175
content: HeadersList,
173-
data: [AddQueryInfo.ContentTypes : String]
176+
data: YouTubeResponse.RequestData
174177
) -> URLRequest {
175-
var url = content.url
178+
var url = URL(string: data[.customURL] ?? "") ?? content.url
176179
if content.parameters != nil {
177180
var parametersToAppend: [URLQueryItem] = []
178181
for parameter in content.parameters! {

Sources/YouTubeKit/URL+AppendQueryItems.swift

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import Foundation
99

1010
public extension URL {
11-
///adapted from https://stackoverflow.com/questions/34060754/how-can-i-build-a-url-with-query-parameters-containing-multiple-values-for-the-s
11+
/// adapted from https://stackoverflow.com/questions/34060754/how-can-i-build-a-url-with-query-parameters-containing-multiple-values-for-the-s
1212
/// If `queryItems` contains mulitple times headers with the same name, only the first will be kept.
1313
mutating func append(queryItems queryItemsToAdd: [URLQueryItem]) {
1414
guard var urlComponents = URLComponents(string: self.absoluteString) else { return }
@@ -30,4 +30,12 @@ public extension URL {
3030
/// Returns the url from new url components.
3131
self = urlComponents.url!
3232
}
33+
34+
func appending(queryItems queryItemsToAdd: [URLQueryItem]) -> URL {
35+
var secondSelf = self
36+
37+
secondSelf.append(queryItems: queryItemsToAdd)
38+
39+
return secondSelf
40+
}
3341
}

Sources/YouTubeKit/YouTubeModel.swift

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,9 @@ public class YouTubeModel {
108108

109109
do {
110110
try ResponseType.validateRequest(data: &data)
111-
112-
111+
113112
/// Create request
114-
let request = HeadersList.setHeadersAgentFor(
113+
let request: URLRequest = HeadersList.setHeadersAgentFor(
115114
content: headers,
116115
data: data
117116
)
@@ -258,6 +257,8 @@ public class YouTubeModel {
258257
return subscribeToChannelHeaders()
259258
case .unsubscribeFromChannelHeaders:
260259
return unsubscribeFromChannelHeaders()
260+
case .videoCaptionsHeaders:
261+
return videoCaptionsHeaders()
261262
case .customHeaders(let stringIdentifier):
262263
if let headersGenerator = customHeadersFunctions[stringIdentifier] {
263264
return headersGenerator()
@@ -1299,6 +1300,29 @@ public class YouTubeModel {
12991300
)
13001301
}
13011302
}
1303+
1304+
func videoCaptionsHeaders() -> HeadersList {
1305+
if let headers = self.customHeaders[.videoCaptionsHeaders] {
1306+
return headers
1307+
} else {
1308+
return HeadersList(
1309+
url: URL(string: "https://www.youtube.com/")!, // will be overriden by the customURL option
1310+
method: .GET,
1311+
headers: [
1312+
.init(name: "Accept", content: "*/*"),
1313+
.init(name: "Accept-Encoding", content: "gzip, deflate, br"),
1314+
.init(name: "Host", content: "www.youtube.com"),
1315+
.init(name: "User-Agent", content: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15"),
1316+
.init(name: "Accept-Language", content: "\(self.selectedLocale);q=0.9"),
1317+
.init(name: "Origin", content: "https://www.youtube.com/"),
1318+
.init(name: "Referer", content: "https://www.youtube.com/"),
1319+
.init(name: "Content-Type", content: "application/xml"),
1320+
.init(name: "X-Origin", content: "https://www.youtube.com")
1321+
],
1322+
parameters: []
1323+
)
1324+
}
1325+
}
13021326
}
13031327

13041328
#if swift(>=5.10)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
//
2+
// VideoCaptionsResponse.swift
3+
//
4+
//
5+
// Created by Antoine Bollengier on 27.06.2024.
6+
// Copyright © 2024 Antoine Bollengier (github.com/b5i). All rights reserved.
7+
//
8+
9+
import Foundation
10+
11+
/// Struct representing a response containing the captions of a video.
12+
public struct VideoCaptionsResponse: YouTubeResponse {
13+
public static let headersType: HeaderTypes = .videoCaptionsHeaders
14+
15+
public static let parametersValidationList: ValidationList = [.customURL: .urlValidator]
16+
17+
public var captionParts: [CaptionPart]
18+
19+
public init(captionParts: [CaptionPart]) {
20+
self.captionParts = captionParts
21+
}
22+
23+
public static func decodeData(data: Data) throws -> VideoCaptionsResponse {
24+
var toReturn = VideoCaptionsResponse(captionParts: [])
25+
26+
#if os(iOS) || os(tvOS) || os(watchOS) || os(macOS) || os(visionOS)
27+
let dataText = CFXMLCreateStringByUnescapingEntities(nil, CFXMLCreateStringByUnescapingEntities(nil, String(decoding: data, as: UTF8.self) as CFString, nil), nil) as String
28+
#else
29+
let dataText = String(decoding: data, as: UTF8.self)
30+
#endif
31+
32+
let regexResults = dataText.ytkRegexMatches(for: #"(?:<text start=\"([0-9\.]*)\" dur=\"([0-9\.]*)">([\w\W]*?)<\/text>)"#)
33+
34+
var currentEndTime: Double = Double.infinity
35+
36+
for result in regexResults.reversed() {
37+
guard result.count == 4 else { continue }
38+
39+
let startTime = Double(result[1]) ?? 0
40+
let duration = min(Double(result[2]) ?? 0, currentEndTime - startTime)
41+
42+
let text = result[3]
43+
44+
toReturn.captionParts.append(
45+
CaptionPart(
46+
text: text,
47+
startTime: startTime,
48+
duration: duration
49+
)
50+
)
51+
52+
currentEndTime = startTime
53+
}
54+
55+
toReturn.captionParts.reverse()
56+
57+
return toReturn
58+
}
59+
60+
/// Decode json to give an instance of ``VideoInfosResponse``.
61+
/// - Parameter json: the json to be decoded.
62+
/// - Returns: an instance of ``VideoInfosResponse``.
63+
public static func decodeJSON(json: JSON) throws -> VideoCaptionsResponse {
64+
throw ResponseExtractionError(reponseType: Self.self, stepDescription: "Can't decode a VideoCaptionsResponse from some raw JSON.")
65+
}
66+
67+
public func getFormattedString(withFormat format: CaptionFormats) -> String {
68+
func getTimeString(_ time: Double) -> String {
69+
let hours: String = String(format: "%02d", Int(time / 3600))
70+
let minutes: String = String(format: "%02d", Int(time - (time / 3600).rounded(.down) * 3600) / 60)
71+
let seconds: String = String(format: "%02d", Int(time.truncatingRemainder(dividingBy: 60)))
72+
let milliseconds: String = String(format: "%03d", Int(time.truncatingRemainder(dividingBy: 1) * 1000))
73+
74+
return "\(hours):\(minutes):\(seconds)\(format == .vtt ? "." : ",")\(milliseconds)"
75+
}
76+
77+
return """
78+
\(format == .vtt ? "WEBVTT\n\n" : "")\(
79+
self.captionParts.enumerated()
80+
.map { offset, captionPart in
81+
return """
82+
\(offset + 1)
83+
\(getTimeString(captionPart.startTime)) --> \(getTimeString(captionPart.startTime + captionPart.duration))
84+
\(captionPart.text)
85+
"""
86+
}
87+
.joined(separator: "\n\n")
88+
)
89+
"""
90+
}
91+
92+
public enum CaptionFormats {
93+
case vtt
94+
case srt
95+
}
96+
97+
public struct CaptionPart: Sendable, Codable {
98+
/// Text of the caption.
99+
///
100+
/// - Warning: The text might contain HTML entities (if `CFXMLCreateStringByUnescapingEntities` is not present), to remove them, call a function like `CFXMLCreateStringByUnescapingEntities()` two times on the text.
101+
public var text: String
102+
103+
/// Start time of the caption, in seconds.
104+
public var startTime: Double
105+
106+
/// Duration of the caption, in seconds.
107+
public var duration: Double
108+
109+
public init(text: String, startTime: Double, duration: Double) {
110+
self.text = text
111+
self.startTime = startTime
112+
self.duration = duration
113+
}
114+
}
115+
}

0 commit comments

Comments
 (0)