Skip to content

Commit

Permalink
Added support for captions extraction.
Browse files Browse the repository at this point in the history
  • Loading branch information
b5i committed Jun 27, 2024
1 parent b489070 commit 43178e4
Show file tree
Hide file tree
Showing 13 changed files with 389 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ public protocol ResponseContinuation: YouTubeResponse {
var continuationToken: String? { get set }

/// Results of the continuation search.
var results: [ResultsType] { get set }
var results: [ResultsType] { get set }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//
// YouTubeVideo+getCaptions.swift
//
//
// Created by Antoine Bollengier on 27.06.2024.
// Copyright © 2024 Antoine Bollengier (github.com/b5i). All rights reserved.
//

public extension YouTubeVideo {
/// Get the captions for the current video.
static func getCaptions(youtubeModel: YouTubeModel, captionType: YTCaption, result: @escaping @Sendable (Result<VideoCaptionsResponse, Error>) -> Void) {
VideoCaptionsResponse.sendNonThrowingRequest(youtubeModel: youtubeModel, data: [.customURL: captionType.url.absoluteString], result: { response in
switch response {
case .success(let data):
result(.success(data))
case .failure(let error):
result(.failure(error))
}
})
}

/// Get the captions for the current video.
@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
static func getCaptionsThrowing(youtubeModel: YouTubeModel, captionType: YTCaption) async throws -> VideoCaptionsResponse {
return try await withCheckedThrowingContinuation({ (continuation: CheckedContinuation<VideoCaptionsResponse, Error>) in
self.getCaptions(youtubeModel: youtubeModel, captionType: captionType, result: { result in
continuation.resume(with: result)
})
})
}

/// Get the captions for the current video.
@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
static func getCaptions(youtubeModel: YouTubeModel, captionType: YTCaption) async -> Result<VideoCaptionsResponse, Error> {
return await withCheckedContinuation({ (continuation: CheckedContinuation<Result<VideoCaptionsResponse, Error>, Never>) in
self.getCaptions(youtubeModel: youtubeModel, captionType: captionType, result: { result in
continuation.resume(returning: result)
})
})
}
}
11 changes: 11 additions & 0 deletions Sources/YouTubeKit/BaseProtocols/Video/YouTubeVideo.swift
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,15 @@ public protocol YouTubeVideo {
///
/// Requires a ``YouTubeModel`` where ``YouTubeModel/cookies`` is defined.
func removeLikeFromVideo(youtubeModel: YouTubeModel) async -> Error?

/// Get the captions for the current video.
static func getCaptions(youtubeModel: YouTubeModel, captionType: YTCaption, result: @escaping @Sendable (Result<VideoCaptionsResponse, Error>) -> Void)

/// Get the captions for the current video.
@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
static func getCaptionsThrowing(youtubeModel: YouTubeModel, captionType: YTCaption) async throws -> VideoCaptionsResponse

/// Get the captions for the current video.
@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
static func getCaptions(youtubeModel: YouTubeModel, captionType: YTCaption) async -> Result<VideoCaptionsResponse, Error>
}
26 changes: 26 additions & 0 deletions Sources/YouTubeKit/BaseStructs/YTCaption.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//
// YTCaption.swift
//
//
// Created by Antoine Bollengier on 27.06.2024.
// Copyright © 2024 Antoine Bollengier (github.com/b5i). All rights reserved.
//

import Foundation

public struct YTCaption: Sendable {
public var languageCode: String

public var languageName: String

public var url: URL

public var isTranslated: Bool

public init(languageCode: String, languageName: String, url: URL, isTranslated: Bool) {
self.languageCode = languageCode
self.languageName = languageName
self.url = url
self.isTranslated = isTranslated
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,16 @@ public extension ParameterValidator {
return .success(privacy)
}
})

static let urlValidator = ParameterValidator(needExistence: true, validator: { url in
let validatorName = "URL validator"

guard let url = url else { return .failure(.init(reason: "Nil value.", validatorFailedNameDescriptor: validatorName)) } // should never be called because of the needExistence

if URL(string: url) != nil {
return .success(url)
} else {
return .failure(.init(reason: "Given url is not a valid URL.", validatorFailedNameDescriptor: validatorName))
}
})
}
2 changes: 2 additions & 0 deletions Sources/YouTubeKit/HeaderTypes+RawRepresentable.swift
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ extension HeaderTypes: RawRepresentable {
return "subscribeToChannelHeaders"
case .unsubscribeFromChannelHeaders:
return "unsubscribeFromChannelHeaders"
case .videoCaptionsHeaders:
return "videoCaptionsHeaders"
case .customHeaders(let stringIdentifier):
return stringIdentifier
}
Expand Down
2 changes: 2 additions & 0 deletions Sources/YouTubeKit/HeaderTypes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ public enum HeaderTypes: Codable, Sendable {
/// - Parameter browseId: The channel's id should be taken from ``YTChannel/channelId`` or ``YTLittleChannelInfos/channelId``.
case unsubscribeFromChannelHeaders

case videoCaptionsHeaders

/// For custom headers
case customHeaders(String)
}
9 changes: 6 additions & 3 deletions Sources/YouTubeKit/HeadersList.swift
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,13 @@ public struct HeadersList: Codable {
case params
case visitorData

///Those are used during the modification of a playlist
/// Those are used during the modification of a playlist
case movingVideoId
case videoBeforeId
case playlistEditToken

/// Used to completly replace the URL of the request, including the parameters that could potentially
case customURL
}
}

Expand All @@ -170,9 +173,9 @@ public struct HeadersList: Codable {
/// - Returns: An `URLRequest`built with the provided parameters and headers.
public static func setHeadersAgentFor(
content: HeadersList,
data: [AddQueryInfo.ContentTypes : String]
data: YouTubeResponse.RequestData
) -> URLRequest {
var url = content.url
var url = URL(string: data[.customURL] ?? "") ?? content.url
if content.parameters != nil {
var parametersToAppend: [URLQueryItem] = []
for parameter in content.parameters! {
Expand Down
10 changes: 9 additions & 1 deletion Sources/YouTubeKit/URL+AppendQueryItems.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import Foundation

public extension URL {
///adapted from https://stackoverflow.com/questions/34060754/how-can-i-build-a-url-with-query-parameters-containing-multiple-values-for-the-s
/// adapted from https://stackoverflow.com/questions/34060754/how-can-i-build-a-url-with-query-parameters-containing-multiple-values-for-the-s
/// If `queryItems` contains mulitple times headers with the same name, only the first will be kept.
mutating func append(queryItems queryItemsToAdd: [URLQueryItem]) {
guard var urlComponents = URLComponents(string: self.absoluteString) else { return }
Expand All @@ -30,4 +30,12 @@ public extension URL {
/// Returns the url from new url components.
self = urlComponents.url!
}

func appending(queryItems queryItemsToAdd: [URLQueryItem]) -> URL {
var secondSelf = self

secondSelf.append(queryItems: queryItemsToAdd)

return secondSelf
}
}
30 changes: 27 additions & 3 deletions Sources/YouTubeKit/YouTubeModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,9 @@ public class YouTubeModel {

do {
try ResponseType.validateRequest(data: &data)



/// Create request
let request = HeadersList.setHeadersAgentFor(
let request: URLRequest = HeadersList.setHeadersAgentFor(
content: headers,
data: data
)
Expand Down Expand Up @@ -258,6 +257,8 @@ public class YouTubeModel {
return subscribeToChannelHeaders()
case .unsubscribeFromChannelHeaders:
return unsubscribeFromChannelHeaders()
case .videoCaptionsHeaders:
return videoCaptionsHeaders()
case .customHeaders(let stringIdentifier):
if let headersGenerator = customHeadersFunctions[stringIdentifier] {
return headersGenerator()
Expand Down Expand Up @@ -1299,6 +1300,29 @@ public class YouTubeModel {
)
}
}

func videoCaptionsHeaders() -> HeadersList {
if let headers = self.customHeaders[.videoCaptionsHeaders] {
return headers
} else {
return HeadersList(
url: URL(string: "https://www.youtube.com/")!, // will be overriden by the customURL option
method: .GET,
headers: [
.init(name: "Accept", content: "*/*"),
.init(name: "Accept-Encoding", content: "gzip, deflate, br"),
.init(name: "Host", content: "www.youtube.com"),
.init(name: "User-Agent", content: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15"),
.init(name: "Accept-Language", content: "\(self.selectedLocale);q=0.9"),
.init(name: "Origin", content: "https://www.youtube.com/"),
.init(name: "Referer", content: "https://www.youtube.com/"),
.init(name: "Content-Type", content: "application/xml"),
.init(name: "X-Origin", content: "https://www.youtube.com")
],
parameters: []
)
}
}
}

#if swift(>=5.10)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
//
// VideoCaptionsResponse.swift
//
//
// Created by Antoine Bollengier on 27.06.2024.
// Copyright © 2024 Antoine Bollengier (github.com/b5i). All rights reserved.
//

import Foundation

/// Struct representing a response containing the captions of a video.
public struct VideoCaptionsResponse: YouTubeResponse {
public static let headersType: HeaderTypes = .videoCaptionsHeaders

public static let parametersValidationList: ValidationList = [.customURL: .urlValidator]

public var captionParts: [CaptionPart]

public init(captionParts: [CaptionPart]) {
self.captionParts = captionParts
}

public static func decodeData(data: Data) throws -> VideoCaptionsResponse {
var toReturn = VideoCaptionsResponse(captionParts: [])

#if os(iOS) || os(tvOS) || os(watchOS) || os(macOS) || os(visionOS)
let dataText = CFXMLCreateStringByUnescapingEntities(nil, CFXMLCreateStringByUnescapingEntities(nil, String(decoding: data, as: UTF8.self) as CFString, nil), nil) as String
#else
let dataText = String(decoding: data, as: UTF8.self)
#endif

let regexResults = dataText.ytkRegexMatches(for: #"(?:<text start=\"([0-9\.]*)\" dur=\"([0-9\.]*)">([\w\W]*?)<\/text>)"#)

var currentEndTime: Double = Double.infinity

for result in regexResults.reversed() {
guard result.count == 4 else { continue }

let startTime = Double(result[1]) ?? 0
let duration = min(Double(result[2]) ?? 0, currentEndTime - startTime)

let text = result[3]

toReturn.captionParts.append(
CaptionPart(
text: text,
startTime: startTime,
duration: duration
)
)

currentEndTime = startTime
}

toReturn.captionParts.reverse()

return toReturn
}

/// Decode json to give an instance of ``VideoInfosResponse``.
/// - Parameter json: the json to be decoded.
/// - Returns: an instance of ``VideoInfosResponse``.
public static func decodeJSON(json: JSON) throws -> VideoCaptionsResponse {
throw ResponseExtractionError(reponseType: Self.self, stepDescription: "Can't decode a VideoCaptionsResponse from some raw JSON.")
}

public func getFormattedString(withFormat format: CaptionFormats) -> String {
func getTimeString(_ time: Double) -> String {
let hours: String = String(format: "%02d", Int(time / 3600))
let minutes: String = String(format: "%02d", Int(time - (time / 3600).rounded(.down) * 3600) / 60)
let seconds: String = String(format: "%02d", Int(time.truncatingRemainder(dividingBy: 60)))
let milliseconds: String = String(format: "%03d", Int(time.truncatingRemainder(dividingBy: 1) * 1000))

return "\(hours):\(minutes):\(seconds)\(format == .vtt ? "." : ",")\(milliseconds)"
}

return """
\(format == .vtt ? "WEBVTT\n\n" : "")\(
self.captionParts.enumerated()
.map { offset, captionPart in
return """
\(offset + 1)
\(getTimeString(captionPart.startTime)) --> \(getTimeString(captionPart.startTime + captionPart.duration))
\(captionPart.text)
"""
}
.joined(separator: "\n\n")
)
"""
}

public enum CaptionFormats {
case vtt
case srt
}

public struct CaptionPart: Sendable, Codable {
/// Text of the caption.
///
/// - Warning: The text might contain HTML entities (if `CFXMLCreateStringByUnescapingEntities` is not present), to remove them, call a function like `CFXMLCreateStringByUnescapingEntities()` two times on the text.
public var text: String

/// Start time of the caption, in seconds.
public var startTime: Double

/// Duration of the caption, in seconds.
public var duration: Double

public init(text: String, startTime: Double, duration: Double) {
self.text = text
self.startTime = startTime
self.duration = duration
}
}
}
Loading

0 comments on commit 43178e4

Please sign in to comment.