-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl.php
81 lines (70 loc) · 1.85 KB
/
crawl.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
<?php
set_time_limit(0);
$archived = [];
function download_annotations($id)
{
global $archived;
if(empty($archived[$id]))
{
exec("start /b youtube-dl/youtube-dl.exe --config-location youtube-dl https://www.youtube.com/watch?v=$id", $output, $return);
$downloaded = find_file($id, $output);
if(empty($downloaded))
{
return;
}
$archived[$id] = $downloaded;
$annotation = htmlspecialchars_decode(file_get_contents($downloaded), ENT_NOQUOTES);
preg_match_all('#\bhttps?://[^,\s()<>]+(?:\([\w\d]+\)|([^,[:punct:]\s]|/))#', $annotation, $match);
$videos = [];
foreach($match[0] as $url)
{
$video = explode("?", $url);
$v = explode("&v=", $video[1]);
$time = explode("#", $v[1]);
if(empty($time[0]) == false)
{
download_annotations($time[0]);
}
}
}
}
function find_file($id, $output)
{
foreach($output as $line)
{
if(substr($line, 0, 37) == "[info] Writing video annotations to: ")
{
//Doesn't play well with UTF-8 file names, so this will just skip them
if(file_exists(substr($line, 37)))
{
return substr($line, 37);
}
}
}
//Fall back on iterating through Videos directory for the XML that was just downloaded
$iterator = new RecursiveDirectoryIterator("Videos");
foreach(new RecursiveIteratorIterator($iterator) as $file)
{
if(in_array(pathinfo($file)["extension"], ["xml"]) && strpos($file, $id))
{
return $file;
}
}
}
if(isset($_GET["v"]) && empty($_GET["v"] == false))
{
$id = urldecode($_GET["v"]);
download_annotations($id);
$output = "<br>Downloaded " . count($archived) . " files:<br>" . implode("<br>", $archived);
}
?>
<!DOCTYPE html>
<html>
<body>
<form action = "<?php echo basename(__FILE__); ?>" method = "GET">
Video ID: <input type = "text" name = "v" />
<input type = "submit" value = "Grab annotations" />
</form>
<?php echo $output; ?>
</body>
</html>