1
+ <?php
2
+ error_reporting (0 );
3
+ // utils.php
4
+ function sig_js_decode ($ player_html ){
5
+
6
+ // what javascript function is responsible for signature decryption?
7
+ // var l=f.sig||Xn(f.s)
8
+ // a.set("signature",Xn(c));return a
9
+ if (preg_match ('/signature",([a-zA-Z0-9$]+)\(/ ' , $ player_html , $ matches )){
10
+
11
+ $ func_name = $ matches [1 ];
12
+ $ func_name = preg_quote ($ func_name );
13
+
14
+ // extract code block from that function
15
+ // single quote in case function name contains $dollar sign
16
+ // xm=function(a){a=a.split("");wm.zO(a,47);wm.vY(a,1);wm.z9(a,68);wm.zO(a,21);wm.z9(a,34);wm.zO(a,16);wm.z9(a,41);return a.join("")};
17
+ if (preg_match ('/ ' .$ func_name .'=function\([a-z]+\){(.*?)}/ ' , $ player_html , $ matches )){
18
+
19
+ $ js_code = $ matches [1 ];
20
+
21
+ // extract all relevant statements within that block
22
+ // wm.vY(a,1);
23
+ if (preg_match_all ('/([a-z0-9]{2})\.([a-z0-9]{2})\([^,]+,(\d+)\)/i ' , $ js_code , $ matches ) != false ){
24
+
25
+ // must be identical
26
+ $ obj_list = $ matches [1 ];
27
+
28
+ //
29
+ $ func_list = $ matches [2 ];
30
+
31
+ // extract javascript code for each one of those statement functions
32
+ preg_match_all ('/( ' .implode ('| ' , $ func_list ).'):function(.*?)\}/m ' , $ player_html , $ matches2 , PREG_SET_ORDER );
33
+
34
+ $ functions = array ();
35
+
36
+ // translate each function according to its use
37
+ foreach ($ matches2 as $ m ){
38
+
39
+ if (strpos ($ m [2 ], 'splice ' ) !== false ){
40
+ $ functions [$ m [1 ]] = 'splice ' ;
41
+ } else if (strpos ($ m [2 ], 'a.length ' ) !== false ){
42
+ $ functions [$ m [1 ]] = 'swap ' ;
43
+ } else if (strpos ($ m [2 ], 'reverse ' ) !== false ){
44
+ $ functions [$ m [1 ]] = 'reverse ' ;
45
+ }
46
+ }
47
+
48
+ // FINAL STEP! convert it all to instructions set
49
+ $ instructions = array ();
50
+
51
+ foreach ($ matches [2 ] as $ index => $ name ){
52
+ $ instructions [] = array ($ functions [$ name ], $ matches [3 ][$ index ]);
53
+ }
54
+
55
+ return $ instructions ;
56
+ }
57
+ }
58
+ }
59
+
60
+ return false ;
61
+ }
62
+
63
+
64
+
65
+
66
+ // YouTube is capitalized twice because that's how youtube itself does it:
67
+ // https://developers.google.com/youtube/v3/code_samples/php
68
+ class YouTubeDownloader {
69
+
70
+ private $ storage_dir ;
71
+ private $ cookie_dir ;
72
+
73
+ private $ itag_info = array (
74
+
75
+ 18 => "360P " ,
76
+ 22 => "720P " ,
77
+ 37 => "1080P " ,
78
+ 38 => "3072P " ,
79
+
80
+ // questionable MP4s
81
+ 59 => "MP4480P " ,
82
+ 78 => "MP4480P " ,
83
+
84
+ 43 => "WebM360P " ,
85
+
86
+ 17 => "3GP144P "
87
+ );
88
+
89
+ function __construct (){
90
+ $ this ->storage_dir = sys_get_temp_dir ();
91
+ $ this ->cookie_dir = sys_get_temp_dir ();
92
+ }
93
+
94
+ function setStorageDir ($ dir ){
95
+ $ this ->storage_dir = $ dir ;
96
+ }
97
+
98
+ // what identifies each request? user agent, cookies...
99
+ public function curl ($ url ){
100
+
101
+ $ ch = curl_init ($ url );
102
+
103
+ curl_setopt ($ ch , CURLOPT_USERAGENT , 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0 ' );
104
+ curl_setopt ($ ch , CURLOPT_RETURNTRANSFER , 1 );
105
+ curl_setopt ($ ch , CURLOPT_HEADER , 0 );
106
+
107
+ //curl_setopt($ch, CURLOPT_COOKIEJAR, $tmpfname);
108
+ //curl_setopt($ch, CURLOPT_COOKIEFILE, $tmpfname);
109
+
110
+ //curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
111
+ curl_setopt ($ ch , CURLOPT_SSL_VERIFYPEER , 0 );
112
+ curl_setopt ($ ch , CURLOPT_FOLLOWLOCATION , 1 );
113
+
114
+ $ result = curl_exec ($ ch );
115
+ curl_close ($ ch );
116
+
117
+ return $ result ;
118
+ }
119
+
120
+ public static function head ($ url ){
121
+
122
+ $ ch = curl_init ($ url );
123
+
124
+ curl_setopt ($ ch , CURLOPT_HEADER , 1 );
125
+ curl_setopt ($ ch , CURLOPT_RETURNTRANSFER , 1 );
126
+ curl_setopt ($ ch , CURLOPT_FOLLOWLOCATION , 0 );
127
+ curl_setopt ($ ch , CURLOPT_NOBODY , 1 );
128
+ $ result = curl_exec ($ ch );
129
+ curl_close ($ ch );
130
+
131
+ return http_parse_headers ($ result );
132
+ }
133
+
134
+ // html code of watch?v=aaa
135
+ private function getInstructions ($ html ){
136
+
137
+ // <script src="//s.ytimg.com/yts/jsbin/player-fr_FR-vflHVjlC5/base.js" name="player/base"></script>
138
+
139
+ // check what player version that video is using
140
+ if (preg_match ('@<script\s*src="([^"]+player[^"]+js)@ ' , $ html , $ matches )){
141
+
142
+ $ player_url = $ matches [1 ];
143
+
144
+ // relative protocol?
145
+ if (strpos ($ player_url , '// ' ) === 0 ){
146
+ $ player_url = 'http:// ' .substr ($ player_url , 2 );
147
+ } else if (strpos ($ player_url , '/ ' ) === 0 ){
148
+ // relative path?
149
+ $ player_url = 'http://www.youtube.com ' .$ player_url ;
150
+ }
151
+
152
+ // try to find instructions list already cached from previous requests...
153
+ $ file_path = $ this ->storage_dir .'/ ' .md5 ($ player_url );
154
+
155
+ if (file_exists ($ file_path )){
156
+
157
+ // unserialize could fail on empty file
158
+ $ str = file_get_contents ($ file_path );
159
+ return unserialize ($ str );
160
+
161
+ } else {
162
+
163
+ $ js_code = $ this ->curl ($ player_url );
164
+ $ instructions = sig_js_decode ($ js_code );
165
+
166
+ if ($ instructions ){
167
+ file_put_contents ($ file_path , serialize ($ instructions ));
168
+ return $ instructions ;
169
+ }
170
+ }
171
+ }
172
+
173
+ return false ;
174
+ }
175
+
176
+ // this is in beta mode!!
177
+ public function stream ($ id ){
178
+
179
+ $ links = $ this ->getDownloadLinks ($ id , "mp4 " );
180
+
181
+ if (count ($ links ) == 0 ){
182
+ die ("no url found! " );
183
+ }
184
+
185
+ // grab first available MP4 link
186
+ $ url = $ links [0 ]['url ' ];
187
+
188
+ // request headers
189
+ $ headers = array (
190
+ 'User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0 '
191
+ );
192
+
193
+ if (isset ($ _SERVER ['HTTP_RANGE ' ])){
194
+ $ headers [] = 'Range: ' .$ _SERVER ['HTTP_RANGE ' ];
195
+ }
196
+
197
+ $ ch = curl_init ();
198
+ curl_setopt ($ ch , CURLOPT_HTTPHEADER , $ headers );
199
+ curl_setopt ($ ch , CURLOPT_URL , $ url );
200
+ curl_setopt ($ ch , CURLOPT_FOLLOWLOCATION , 0 );
201
+
202
+ // we deal with this ourselves
203
+ curl_setopt ($ ch , CURLOPT_RETURNTRANSFER , 0 );
204
+ curl_setopt ($ ch , CURLOPT_HEADER , 0 );
205
+
206
+ // whether request to video success
207
+ $ headers = '' ;
208
+ $ headers_sent = false ;
209
+ $ success = false ;
210
+
211
+ curl_setopt ($ ch , CURLOPT_HEADERFUNCTION , function ($ ch , $ data ) use (&$ headers , &$ headers_sent ){
212
+
213
+ $ headers .= $ data ;
214
+
215
+ // this should be first line
216
+ if (preg_match ('@HTTP\/\d\.\d\s(\d+)@ ' , $ data , $ matches )){
217
+ $ status_code = $ matches [1 ];
218
+
219
+ // status=ok or partial content
220
+ if ($ status_code == 200 || $ status_code == 206 ){
221
+ $ headers_sent = true ;
222
+ header (rtrim ($ data ));
223
+ }
224
+
225
+ } else {
226
+
227
+ // only headers we wish to forward back to the client
228
+ $ forward = array ('content-type ' , 'content-length ' , 'accept-ranges ' , 'content-range ' );
229
+
230
+ $ parts = explode (': ' , $ data , 2 );
231
+
232
+ if ($ headers_sent && count ($ parts ) == 2 && in_array (trim (strtolower ($ parts [0 ])), $ forward )){
233
+ header (rtrim ($ data ));
234
+ }
235
+ }
236
+
237
+ return strlen ($ data );
238
+ });
239
+
240
+ // if response is empty - this never gets called
241
+ curl_setopt ($ ch , CURLOPT_WRITEFUNCTION , function ($ curl , $ data ) use (&$ headers_sent ){
242
+
243
+ if ($ headers_sent ){
244
+ echo $ data ;
245
+ flush ();
246
+ }
247
+
248
+ return strlen ($ data );
249
+ });
250
+
251
+ $ ret = @curl_exec ($ ch );
252
+ $ error = curl_error ($ ch );
253
+ curl_close ($ ch );
254
+
255
+ // if we are still here by now, return status_code
256
+ return true ;
257
+ }
258
+
259
+ // extract youtube video_id from any piece of text
260
+ public function extractId ($ str ){
261
+
262
+ if (preg_match ('/[a-z0-9_-]{11}/i ' , $ str , $ matches )){
263
+ return $ matches [0 ];
264
+ }
265
+
266
+ return false ;
267
+ }
268
+
269
+ // selector by format: mp4 360,
270
+ private function selectFirst ($ links , $ selector ){
271
+
272
+ $ result = array ();
273
+ $ formats = preg_split ('/\s*,\s*/ ' , $ selector );
274
+
275
+ // has to be in this order
276
+ foreach ($ formats as $ f ){
277
+
278
+ foreach ($ links as $ l ){
279
+
280
+ if (stripos ($ l ['format ' ], $ f ) !== false || $ f == 'any ' ){
281
+ $ result [] = $ l ;
282
+ }
283
+ }
284
+ }
285
+
286
+ return $ result ;
287
+ }
288
+
289
+ // options | deep_links | append_redirector
290
+ public function getDownloadLinks ($ id , $ selector = false ){
291
+
292
+ $ result = array ();
293
+ $ instructions = array ();
294
+
295
+ // you can input HTML of /watch? page directory instead of id
296
+ if (strpos ($ id , '<div id="player ' ) !== false ){
297
+ $ html = $ id ;
298
+ } else {
299
+ $ video_id = $ this ->extractId ($ id );
300
+
301
+ if (!$ video_id ){
302
+ return false ;
303
+ }
304
+
305
+ $ html = $ this ->curl ("https://www.youtube.com/watch?v= {$ video_id }" );
306
+ }
307
+
308
+ // age-gate
309
+ if (strpos ($ html , 'player-age-gate-content ' ) !== false ){
310
+ // nothing you can do folks...
311
+ return false ;
312
+ }
313
+
314
+ // http://stackoverflow.com/questions/35608686/how-can-i-get-the-actual-video-url-of-a-youtube-live-stream
315
+ if (preg_match ('@url_encoded_fmt_stream_map[" \']:\s*[" \']([^" \'\s]*)@ ' , $ html , $ matches )){
316
+
317
+ $ parts = explode (", " , $ matches [1 ]);
318
+
319
+ foreach ($ parts as $ p ){
320
+ $ query = str_replace ('\u0026 ' , '& ' , $ p );
321
+ parse_str ($ query , $ arr );
322
+
323
+ $ url = $ arr ['url ' ];
324
+
325
+ if (isset ($ arr ['sig ' ])){
326
+ $ url = $ url .'&signature= ' .$ arr ['sig ' ];
327
+
328
+ } else if (isset ($ arr ['signature ' ])){
329
+ $ url = $ url .'&signature= ' .$ arr ['signature ' ];
330
+
331
+ } else if (isset ($ arr ['s ' ])){
332
+
333
+ // this is probably a VEVO/ads video... signature must be decrypted first! We need instructions for doing that
334
+ if (count ($ instructions ) == 0 ){
335
+ $ instructions = (array )$ this ->getInstructions ($ html );
336
+ }
337
+
338
+ $ dec = $ this ->sig_decipher ($ arr ['s ' ], $ instructions );
339
+ $ url = $ url .'&signature= ' .$ dec ;
340
+ }
341
+
342
+ // redirector.googlevideo.com
343
+ //$url = preg_replace('@(\/\/)[^\.]+(\.googlevideo\.com)@', '$1redirector$2', $url);
344
+
345
+ $ itag = $ arr ['itag ' ];
346
+ $ format = isset ($ this ->itag_info [$ itag ]) ? $ this ->itag_info [$ itag ] : 'Unknown ' ;
347
+
348
+ $ result [$ itag ] = array (
349
+ 'url ' => $ url ,
350
+ 'format ' => $ format
351
+ );
352
+ }
353
+ }
354
+
355
+ // do we want all links or just select few?
356
+ if ($ selector ){
357
+ return $ this ->selectFirst ($ result , $ selector );
358
+ }
359
+
360
+ return $ result ;
361
+ }
362
+
363
+ private function sig_decipher ($ signature , $ instructions ){
364
+
365
+ foreach ($ instructions as $ opt ){
366
+
367
+ $ command = $ opt [0 ];
368
+ $ value = $ opt [1 ];
369
+
370
+ if ($ command == 'swap ' ){
371
+
372
+ $ temp = $ signature [0 ];
373
+ $ signature [0 ] = $ signature [$ value % strlen ($ signature )];
374
+ $ signature [$ value ] = $ temp ;
375
+
376
+ } else if ($ command == 'splice ' ){
377
+ $ signature = substr ($ signature , $ value );
378
+ } else if ($ command == 'reverse ' ){
379
+ $ signature = strrev ($ signature );
380
+ }
381
+ }
382
+
383
+ return trim ($ signature );
384
+ }
385
+ }
386
+
387
+
388
+ ?>
0 commit comments