Skip to content

Commit 7de207f

Browse files
1jingguanzhang
1
authored andcommitted
no message
1 parent 91ad0ff commit 7de207f

38 files changed

+16151
-0
lines changed

1.png

18.9 KB
Loading

YouTubeDownloader.php

+388
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,388 @@
1+
<?php
2+
error_reporting(0);
3+
// utils.php
4+
function sig_js_decode($player_html){
5+
6+
// what javascript function is responsible for signature decryption?
7+
// var l=f.sig||Xn(f.s)
8+
// a.set("signature",Xn(c));return a
9+
if(preg_match('/signature",([a-zA-Z0-9$]+)\(/', $player_html, $matches)){
10+
11+
$func_name = $matches[1];
12+
$func_name = preg_quote($func_name);
13+
14+
// extract code block from that function
15+
// single quote in case function name contains $dollar sign
16+
// xm=function(a){a=a.split("");wm.zO(a,47);wm.vY(a,1);wm.z9(a,68);wm.zO(a,21);wm.z9(a,34);wm.zO(a,16);wm.z9(a,41);return a.join("")};
17+
if(preg_match('/'.$func_name.'=function\([a-z]+\){(.*?)}/', $player_html, $matches)){
18+
19+
$js_code = $matches[1];
20+
21+
// extract all relevant statements within that block
22+
// wm.vY(a,1);
23+
if(preg_match_all('/([a-z0-9]{2})\.([a-z0-9]{2})\([^,]+,(\d+)\)/i', $js_code, $matches) != false){
24+
25+
// must be identical
26+
$obj_list = $matches[1];
27+
28+
//
29+
$func_list = $matches[2];
30+
31+
// extract javascript code for each one of those statement functions
32+
preg_match_all('/('.implode('|', $func_list).'):function(.*?)\}/m', $player_html, $matches2, PREG_SET_ORDER);
33+
34+
$functions = array();
35+
36+
// translate each function according to its use
37+
foreach($matches2 as $m){
38+
39+
if(strpos($m[2], 'splice') !== false){
40+
$functions[$m[1]] = 'splice';
41+
} else if(strpos($m[2], 'a.length') !== false){
42+
$functions[$m[1]] = 'swap';
43+
} else if(strpos($m[2], 'reverse') !== false){
44+
$functions[$m[1]] = 'reverse';
45+
}
46+
}
47+
48+
// FINAL STEP! convert it all to instructions set
49+
$instructions = array();
50+
51+
foreach($matches[2] as $index => $name){
52+
$instructions[] = array($functions[$name], $matches[3][$index]);
53+
}
54+
55+
return $instructions;
56+
}
57+
}
58+
}
59+
60+
return false;
61+
}
62+
63+
64+
65+
66+
// YouTube is capitalized twice because that's how youtube itself does it:
67+
// https://developers.google.com/youtube/v3/code_samples/php
68+
class YouTubeDownloader {
69+
70+
private $storage_dir;
71+
private $cookie_dir;
72+
73+
private $itag_info = array(
74+
75+
18 => "360P",
76+
22 => "720P",
77+
37 => "1080P",
78+
38 => "3072P",
79+
80+
// questionable MP4s
81+
59 => "MP4480P",
82+
78 => "MP4480P",
83+
84+
43 => "WebM360P",
85+
86+
17 => "3GP144P"
87+
);
88+
89+
function __construct(){
90+
$this->storage_dir = sys_get_temp_dir();
91+
$this->cookie_dir = sys_get_temp_dir();
92+
}
93+
94+
function setStorageDir($dir){
95+
$this->storage_dir = $dir;
96+
}
97+
98+
// what identifies each request? user agent, cookies...
99+
public function curl($url){
100+
101+
$ch = curl_init($url);
102+
103+
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0');
104+
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
105+
curl_setopt($ch, CURLOPT_HEADER, 0);
106+
107+
//curl_setopt($ch, CURLOPT_COOKIEJAR, $tmpfname);
108+
//curl_setopt($ch, CURLOPT_COOKIEFILE, $tmpfname);
109+
110+
//curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
111+
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
112+
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
113+
114+
$result = curl_exec($ch);
115+
curl_close($ch);
116+
117+
return $result;
118+
}
119+
120+
public static function head($url){
121+
122+
$ch = curl_init($url);
123+
124+
curl_setopt($ch, CURLOPT_HEADER, 1);
125+
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
126+
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
127+
curl_setopt($ch, CURLOPT_NOBODY, 1);
128+
$result = curl_exec($ch);
129+
curl_close($ch);
130+
131+
return http_parse_headers($result);
132+
}
133+
134+
// html code of watch?v=aaa
135+
private function getInstructions($html){
136+
137+
// <script src="//s.ytimg.com/yts/jsbin/player-fr_FR-vflHVjlC5/base.js" name="player/base"></script>
138+
139+
// check what player version that video is using
140+
if(preg_match('@<script\s*src="([^"]+player[^"]+js)@', $html, $matches)){
141+
142+
$player_url = $matches[1];
143+
144+
// relative protocol?
145+
if(strpos($player_url, '//') === 0){
146+
$player_url = 'http://'.substr($player_url, 2);
147+
} else if(strpos($player_url, '/') === 0){
148+
// relative path?
149+
$player_url = 'http://www.youtube.com'.$player_url;
150+
}
151+
152+
// try to find instructions list already cached from previous requests...
153+
$file_path = $this->storage_dir.'/'.md5($player_url);
154+
155+
if(file_exists($file_path)){
156+
157+
// unserialize could fail on empty file
158+
$str = file_get_contents($file_path);
159+
return unserialize($str);
160+
161+
} else {
162+
163+
$js_code = $this->curl($player_url);
164+
$instructions = sig_js_decode($js_code);
165+
166+
if($instructions){
167+
file_put_contents($file_path, serialize($instructions));
168+
return $instructions;
169+
}
170+
}
171+
}
172+
173+
return false;
174+
}
175+
176+
// this is in beta mode!!
177+
public function stream($id){
178+
179+
$links = $this->getDownloadLinks($id, "mp4");
180+
181+
if(count($links) == 0){
182+
die("no url found!");
183+
}
184+
185+
// grab first available MP4 link
186+
$url = $links[0]['url'];
187+
188+
// request headers
189+
$headers = array(
190+
'User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0'
191+
);
192+
193+
if(isset($_SERVER['HTTP_RANGE'])){
194+
$headers[] = 'Range: '.$_SERVER['HTTP_RANGE'];
195+
}
196+
197+
$ch = curl_init();
198+
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
199+
curl_setopt($ch, CURLOPT_URL, $url);
200+
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
201+
202+
// we deal with this ourselves
203+
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);
204+
curl_setopt($ch, CURLOPT_HEADER, 0);
205+
206+
// whether request to video success
207+
$headers = '';
208+
$headers_sent = false;
209+
$success = false;
210+
211+
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function($ch, $data) use (&$headers, &$headers_sent){
212+
213+
$headers .= $data;
214+
215+
// this should be first line
216+
if(preg_match('@HTTP\/\d\.\d\s(\d+)@', $data, $matches)){
217+
$status_code = $matches[1];
218+
219+
// status=ok or partial content
220+
if($status_code == 200 || $status_code == 206){
221+
$headers_sent = true;
222+
header(rtrim($data));
223+
}
224+
225+
} else {
226+
227+
// only headers we wish to forward back to the client
228+
$forward = array('content-type', 'content-length', 'accept-ranges', 'content-range');
229+
230+
$parts = explode(':', $data, 2);
231+
232+
if($headers_sent && count($parts) == 2 && in_array(trim(strtolower($parts[0])), $forward)){
233+
header(rtrim($data));
234+
}
235+
}
236+
237+
return strlen($data);
238+
});
239+
240+
// if response is empty - this never gets called
241+
curl_setopt($ch, CURLOPT_WRITEFUNCTION, function($curl, $data) use (&$headers_sent){
242+
243+
if($headers_sent){
244+
echo $data;
245+
flush();
246+
}
247+
248+
return strlen($data);
249+
});
250+
251+
$ret = @curl_exec($ch);
252+
$error = curl_error($ch);
253+
curl_close($ch);
254+
255+
// if we are still here by now, return status_code
256+
return true;
257+
}
258+
259+
// extract youtube video_id from any piece of text
260+
public function extractId($str){
261+
262+
if(preg_match('/[a-z0-9_-]{11}/i', $str, $matches)){
263+
return $matches[0];
264+
}
265+
266+
return false;
267+
}
268+
269+
// selector by format: mp4 360,
270+
private function selectFirst($links, $selector){
271+
272+
$result = array();
273+
$formats = preg_split('/\s*,\s*/', $selector);
274+
275+
// has to be in this order
276+
foreach($formats as $f){
277+
278+
foreach($links as $l){
279+
280+
if(stripos($l['format'], $f) !== false || $f == 'any'){
281+
$result[] = $l;
282+
}
283+
}
284+
}
285+
286+
return $result;
287+
}
288+
289+
// options | deep_links | append_redirector
290+
public function getDownloadLinks($id, $selector = false){
291+
292+
$result = array();
293+
$instructions = array();
294+
295+
// you can input HTML of /watch? page directory instead of id
296+
if(strpos($id, '<div id="player') !== false){
297+
$html = $id;
298+
} else {
299+
$video_id = $this->extractId($id);
300+
301+
if(!$video_id){
302+
return false;
303+
}
304+
305+
$html = $this->curl("https://www.youtube.com/watch?v={$video_id}");
306+
}
307+
308+
// age-gate
309+
if(strpos($html, 'player-age-gate-content') !== false){
310+
// nothing you can do folks...
311+
return false;
312+
}
313+
314+
// http://stackoverflow.com/questions/35608686/how-can-i-get-the-actual-video-url-of-a-youtube-live-stream
315+
if(preg_match('@url_encoded_fmt_stream_map["\']:\s*["\']([^"\'\s]*)@', $html, $matches)){
316+
317+
$parts = explode(",", $matches[1]);
318+
319+
foreach($parts as $p){
320+
$query = str_replace('\u0026', '&', $p);
321+
parse_str($query, $arr);
322+
323+
$url = $arr['url'];
324+
325+
if(isset($arr['sig'])){
326+
$url = $url.'&signature='.$arr['sig'];
327+
328+
} else if(isset($arr['signature'])){
329+
$url = $url.'&signature='.$arr['signature'];
330+
331+
} else if(isset($arr['s'])){
332+
333+
// this is probably a VEVO/ads video... signature must be decrypted first! We need instructions for doing that
334+
if(count($instructions) == 0){
335+
$instructions = (array)$this->getInstructions($html);
336+
}
337+
338+
$dec = $this->sig_decipher($arr['s'], $instructions);
339+
$url = $url.'&signature='.$dec;
340+
}
341+
342+
// redirector.googlevideo.com
343+
//$url = preg_replace('@(\/\/)[^\.]+(\.googlevideo\.com)@', '$1redirector$2', $url);
344+
345+
$itag = $arr['itag'];
346+
$format = isset($this->itag_info[$itag]) ? $this->itag_info[$itag] : 'Unknown';
347+
348+
$result[$itag] = array(
349+
'url' => $url,
350+
'format' => $format
351+
);
352+
}
353+
}
354+
355+
// do we want all links or just select few?
356+
if($selector){
357+
return $this->selectFirst($result, $selector);
358+
}
359+
360+
return $result;
361+
}
362+
363+
private function sig_decipher($signature, $instructions){
364+
365+
foreach($instructions as $opt){
366+
367+
$command = $opt[0];
368+
$value = $opt[1];
369+
370+
if($command == 'swap'){
371+
372+
$temp = $signature[0];
373+
$signature[0] = $signature[$value % strlen($signature)];
374+
$signature[$value] = $temp;
375+
376+
} else if($command == 'splice'){
377+
$signature = substr($signature, $value);
378+
} else if($command == 'reverse'){
379+
$signature = strrev($signature);
380+
}
381+
}
382+
383+
return trim($signature);
384+
}
385+
}
386+
387+
388+
?>

0 commit comments

Comments
 (0)