-
Notifications
You must be signed in to change notification settings - Fork 0
/
splitCommunity.rb
72 lines (68 loc) · 2.17 KB
/
splitCommunity.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# This is used to process the subtitle.
# However, the subtitle needs to be supplemented by the
class TextSplitter
# initialize this shit. maybe return the processed shit? idk?
def initialize(file)
print startSplit(file)
end
def startSplit(file)
rawData = File.read(file)
# get rid of the double line that causes weird things.
rawData.gsub!(/\r\n\r\n\r\n/,"\r\n\r\n")
return extractLines(rawData)
end
def extractLines(rawData)
# Split it up by subtitle occurance
lines = rawData.split("\r\n\r\n")
spokenLines = Array.new
monologue = ""
lines.each do |line|
# for each subtitle occurance in the raw data, process the lines
processedLines = processDupleLines(line.split("\r\n")[2..-1])
# if there are two people speaking, then do additional processing
if processedLines.size == 2
# in the case of previous monologue, then first person ends the monologue.
# so push in the monologue and the first line.
spokenLines << "#{monologue} #{processedLines[0]}"
monologue = ""
# If the second line is not a monologue, then push it in
# if not, then add to monologue and don't add just yet.
if processedLines[1].include? "|"
spokenLines << processedLines[1]
else
monologue << processedLines[1]
end
else
# if it's a single line, and it's at the end of the monologue
# then push it right in. but if it's still monologue
# thenpush to monologue.
if processedLines.include? "|"
spokenLines << "#{monologue} #{processedLines}"
monologue = ""
else
monologue << " #{processedLines}"
end
end
end
return spokenLines
end
# For duple lines, we want to make sure to combine two lines if they are spoken by the same character
# However, if there is a dash, then two people talking, so don't combine
def processDupleLines(dupleLines)
# If there's two lines
# if two people are talking (using "-" at the second line to indicate), then don't join
# if not, join!
if dupleLines.length == 2
if dupleLines[1][0] == "-"
return dupleLines
end
end
return dupleLines.join(" ")
end
end
# Look for every
Dir.chdir("subtitles")
Dir.glob("*.srt") do |file|
# puts file
TextSplitter.new(file)
end