forked from filebot/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathduplicates.groovy
executable file
·135 lines (109 loc) · 3.34 KB
/
duplicates.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env filebot -script
delete = 'DELETE'.equalsIgnoreCase(_args.action)
binary = 'BINARY'.equalsIgnoreCase(_args.mode)
// Binary Duplicates: Keep Input Argument Order
// Logical Duplicates: Order by Video Quality
order = 'INPUT' .equalsIgnoreCase(_args.order) ? 'INPUT'
: 'QUALITY'.equalsIgnoreCase(_args.order) ? 'QUALITY'
: 'SIZE' .equalsIgnoreCase(_args.order) ? 'SIZE'
: 'DATE' .equalsIgnoreCase(_args.order) ? 'DATE'
: 'TIME' .equalsIgnoreCase(_args.order) ? 'TIME'
: _args.order ==~ /^[{].*[}]$/ ? __shell.callable(_args.order) // e.g. --order '{ a, b -> 0 }'
: binary ? 'INPUT' : 'QUALITY'
// sanity checks
if (args.size() == 0) {
die "Invalid usage: no input"
}
def group(files) {
// Binary Duplicates: Group by File Size, then Fast MovieHash, then CRC32 via Xattr
if (binary) {
def groups = [:]
// 0. Group by File Key (i.e. physical link duplicates are always binary duplicates)
def links = files.groupBy{ f -> any{ f.key }{ f.canonicalFile }{ f } }.entrySet()
// 1. Group by File Size
links.groupBy{ it.value[0].length() }.each{ size, size_fs ->
if (size_fs.size() == 1) {
groups.put(size_fs[0].key, size_fs[0].value)
return
}
// 2. Group by MovieHash
size_fs.groupBy{ it.value[0].hash('moviehash') }.each{ hash, hash_fs ->
if (hash_fs.size() == 1) {
groups.put(hash_fs[0].key, hash_fs[0].value)
return
}
// 3. Group by CRC32 via Xattr
hash_fs.groupBy{ it.value[0].CRC32 }.each{ crc, crc_fs ->
groups.put([size, hash, crc], crc_fs.collectMany{ it.value })
}
}
}
return groups
}
// Logical Duplicates: Group by Xattr Metadata Object
return files.findAll{ it.isVideo() }.groupBy{ f ->
def m = f.metadata
if (m == null) {
log.finest "[XATTR NOT FOUND] $f"
return null
}
// Strict Mode: group by metadata
// Non-Lenient Mode: group by metadata and video format and HDR type
return _args.strict ? m : [m, getMediaInfo(f, '{vf} {hdr}')]
}
}
def order(files) {
switch(order) {
case 'INPUT':
return files
case 'QUALITY':
return files.toSorted(VideoQuality.DESCENDING_ORDER)
case 'SIZE':
return files.toSorted{ -(it.length()) }
case 'DATE':
return files.toSorted{ -(it.mediaCharacteristics?.creationTime?.toEpochMilli() ?: it.creationDate) }
case 'TIME':
return files.toSorted{ -(it.lastModified()) }
default:
return files.toSorted(order)
}
}
// select video files (and preserve input argument order)
def files = args.files
def duplicates = []
group(files).each{ g, fs ->
if (g && fs.size() > 1) {
log.info "[*] $g"
order(fs).unique().eachWithIndex{ f, i ->
if (i == 0) {
log.finest "[+] 1. $f"
} else {
log.warning "[-] ${i+1}. $f"
duplicates += f
}
}
}
}
// no duplicates; return with NOOP
if (duplicates.size() == 0) {
die "0 duplicates", ExitCode.NOOP
}
// continue with post-processing
log.fine "${duplicates.size()} duplicates"
// select duplicate files and then pipe them to -rename as input
if (_args.rename) {
rename(file: duplicates, db: binary ? 'file' : 'xattr')
return
}
// select files from history and then pipe them to -mediainfo --format or -find -exec as input
if (_args.format || _args.exec || _args.apply) {
getMediaInfo(file: duplicates)
return
}
// delete duplicate files
if (delete) {
duplicates.each{ f ->
log.info "[DELETE] $f"
f.trash()
}
}