-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstring_based.py
More file actions
241 lines (224 loc) · 11.3 KB
/
string_based.py
File metadata and controls
241 lines (224 loc) · 11.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import random
import pretty_midi
import numpy as np
import glob
from utility import *
def is_note_equal(this,that):
"""
this: pretty_midi Note
that: pretty_midi Note
"""
if this==None or that==None:
return False
return this.pitch == that.pitch and this.get_duration() == that.get_duration()
def find_biggest_recurring_pattern(seq):
"""
seq: array-like of pretty_midi Note.
Returns the biggest pattern (sublist of notes) that appears at least twice, as well as the index of its first appearance in "seq".
"""
A = np.zeros((len(seq)+1,len(seq)+1),dtype=int)
res = list()
res_length = 0
index = 0
for i in range(1,len(seq)+1):
for j in range(i+1,len(seq)+1):
if seq[i-1]!=None and seq[j-1]!=None and is_note_equal(seq[i-1],seq[j-1]) and (j-i) > A[i-1][j-1]:
A[i][j] = A[i-1][j-1] + 1
if A[i][j] > res_length:
res_length = A[i][j]
index = max(i,index)
else:
A[i][j] = 0
if res_length > 0:
for i in range(index-res_length + 1, index+1):
res.append(seq[i-1])
return res, index-res_length
def find_occurrences_and_indexes(seq):
"""
seq: array-like of pretty_midi Note
Returns the sequence of notes with the biggest pattern removed, the biggest recurring pattern, and the indexes of each occurrence of that pattern.
"""
res, index_first_occurrence = find_biggest_recurring_pattern(seq)
if len(res)==0:
return seq, None, list()
temp_seq = seq[0:index_first_occurrence]
i = index_first_occurrence
index_occurrences = list()
while i < len(seq):
is_start = False
if is_note_equal(seq[i],res[0]):
is_start = True
for j in range(len(res)):
if i + j >= len(seq) or not is_note_equal(seq[i+j],res[j]):
is_start = False
break
if not is_start:
temp_seq.append(seq[i])
i+=1
else:
index_occurrences.append(i)
for j in range(len(res)):
temp_seq.append(None)
i+=len(res)
return temp_seq, res, index_occurrences
def find_all_occurrences_and_indexes(seq):
"""
seq: array-like of pretty_midi Note
Finds all patterns and indexes of those patterns.
"""
list_patterns = list()
list_indexes = list()
res = list()
seq_x = seq
while res!=None:
seq_x, res, indexes = find_occurrences_and_indexes(seq_x)
if res!=None:
list_patterns.append(res)
list_indexes.append(indexes)
for i in range(len(seq_x)):
# special case for non recurring patterns: notes that appear only once
if seq_x[i]!=None:
list_patterns.append([seq_x[i]])
list_indexes.append([i])
return list_patterns,list_indexes
def first_order_markov_with_patterns(seq,with_smoothing=False,probability_known_patterns=0.9):
"""
seq: array-like of pretty_midi Note.
Returns a first-order Markov model of the patterns found in the sequence of note,
the list of patterns, the list of indexes, and a transformation of notes->patterns.
"""
list_patterns, list_indexes = find_all_occurrences_and_indexes(seq)
index_to_pattern_index = {}
for i in range(len(list_indexes)):
for j in range(len(list_indexes[i])):
index_to_pattern_index[list_indexes[i][j]] = i
pattern_indexes_seq = list()
if len(index_to_pattern_index.keys())>0:
head = 0
while head < len(seq):
pattern_indexes_seq.append(index_to_pattern_index[head])
head += len(list_patterns[index_to_pattern_index[head]])
return markov_model_first_order(pattern_indexes_seq,with_smoothing,probability_known_patterns),list_patterns,list_indexes,pattern_indexes_seq
def generate_prediction_with_string_based(filename, patterns_to_generate = 4,with_smoothing=False,probability_known_patterns=0.9):
"""
filename: string of the filename to read, has to be a midi (.mid) file.
patterns_to_generate: the number of patterns to generate in the continuation
with_smoothing: default False. Whether the continuation should have additive smoothing or not.
probability_known_patterns: only useful when with_smoothing is set to True. It is the probability assigned to known patterns,
1 - probability_known_patterns will be the probilities assigned to unknown patterns. Needs to be between 0 and 1.
"""
NB_ITERATIONS = patterns_to_generate
seq_temp = pretty_midi.PrettyMIDI(filename).instruments[0].notes
# 0) Transform seq_notes so it has correct durations
# Statistic model with first order markov model
_,onsets,_,_ = parse_midi(seq_temp)
diff_onsets = onsets[1:] - onsets[:len(onsets)-1]
seq = list()
# write current notes, each note ends when the next note starts
for i in range(len(seq_temp)-1):
note = seq_temp[i]
seq.append(pretty_midi.Note(velocity=note.velocity,pitch=note.pitch,start=note.start,end=seq_temp[i+1].start))
# special case for last note, as there isn't a next note
last_note = seq_temp[len(seq_temp)-1]
seq.append(pretty_midi.Note(velocity=last_note.velocity,pitch=last_note.pitch,start=last_note.start,end=last_note.start + find_closest(diff_onsets,last_note.get_duration())))
# 1) Transform sequence of notes into sequence of patterns
markov,patterns,_,transformed_seq = first_order_markov_with_patterns(seq,with_smoothing,probability_known_patterns)
# 2) Generate next patterns
for i in range(NB_ITERATIONS):
last_pattern = transformed_seq[len(transformed_seq)-1]
next_pattern = random.choices(list(markov[last_pattern].keys()),weights=markov[last_pattern].values())[0]
transformed_seq.append(next_pattern)
# 3) Transform back into notes
notes = list()
# special case for first pattern
first_pattern = patterns[transformed_seq[0]]
first_note = first_pattern[0]
notes.append(first_note)
for i in range(1,len(first_pattern)):
current_note = first_pattern[i]
previous_note = notes[len(notes)-1]
new_note = pretty_midi.Note(velocity=current_note.velocity,pitch=current_note.pitch,start=previous_note.end,end=previous_note.end+current_note.get_duration())
notes.append(new_note)
for i in range(1,len(transformed_seq)):
current_pattern = patterns[transformed_seq[i]]
for j in range(len(current_pattern)):
current_note = current_pattern[j]
previous_note = notes[len(notes)-1]
new_note = pretty_midi.Note(velocity=current_note.velocity,pitch=current_note.pitch, start = previous_note.end,end=previous_note.end + current_note.get_duration())
notes.append(new_note)
# 4) Write results
result = pretty_midi.PrettyMIDI()
result_program = pretty_midi.instrument_name_to_program("Acoustic Grand Piano")
result_instrument = pretty_midi.Instrument(program=result_program)
result_instrument.notes = notes#[len(seq_temp):]
result.instruments.append(result_instrument)
result.write(filename[:len(filename)-4] + "_result.mid")
# 5) Show results using abjad
show_notes(result_instrument.notes)
def generate_prediction_with_string_based_for_dataset(dataset_filepath, patterns_to_generate = 20,with_smoothing=True,probability_known_states=0.9):
"""
dataset_filepath: string of the dataset path to read from.
patterns_to_generate: the number of patterns to generate in the continuation
with_smoothing: default True. Whether the continuation should have additive smoothing or not.
probability_known_patterns: only useful when with_smoothing is set to True. It is the probability assigned to known patterns,
1 - probability_known_patterns will be the probilities assigned to unknown patterns. Needs to be between 0 and 1.
"""
DATASET_FILEPATH = dataset_filepath
NB_ITERATIONS = patterns_to_generate
NB_FILES = len(glob.glob(DATASET_FILEPATH + "prime_csv/*.csv"))
counter = 0
steps = int(NB_FILES*0.01)
for filename in glob.glob(DATASET_FILEPATH + "prime_csv/*.csv"):
seq_temp = csv_to_notes(filename)
# 0) Transform seq_notes so it has correct durations
# Statistic model with first order markov model
_,onsets,_,_ = parse_midi(seq_temp)
diff_onsets = onsets[1:] - onsets[:len(onsets)-1]
seq = list()
# write current notes, each note ends when the next note starts
for i in range(len(seq_temp)-1):
note = seq_temp[i]
seq.append(pretty_midi.Note(velocity=note.velocity,pitch=note.pitch,start=note.start,end=seq_temp[i+1].start))
# special case for last note, as there isn't a next note
last_note = seq_temp[len(seq_temp)-1]
seq.append(pretty_midi.Note(velocity=last_note.velocity,pitch=last_note.pitch,start=last_note.start,end=last_note.start + find_closest(diff_onsets,last_note.get_duration())))
# 1) Transform sequence of notes into sequence of patterns
markov,patterns,_,transformed_seq = first_order_markov_with_patterns(seq,with_smoothing,probability_known_states)
# 2) Generate next patterns
for i in range(NB_ITERATIONS):
last_pattern = transformed_seq[len(transformed_seq)-1]
next_pattern = random.choices(list(markov[last_pattern].keys()),weights=markov[last_pattern].values())[0]
transformed_seq.append(next_pattern)
# 3) Transform back into notes
notes = list()
# special case for first pattern
first_pattern = patterns[transformed_seq[0]]
first_note = first_pattern[0]
notes.append(first_note)
for i in range(1,len(first_pattern)):
current_note = first_pattern[i]
previous_note = notes[len(notes)-1]
new_note = pretty_midi.Note(velocity=current_note.velocity,pitch=current_note.pitch,start=previous_note.end,end=previous_note.end+current_note.get_duration())
notes.append(new_note)
for i in range(1,len(transformed_seq)):
current_pattern = patterns[transformed_seq[i]]
for j in range(len(current_pattern)):
current_note = current_pattern[j]
previous_note = notes[len(notes)-1]
new_note = pretty_midi.Note(velocity=current_note.velocity,pitch=current_note.pitch, start = previous_note.end,end=previous_note.end + current_note.get_duration())
notes.append(new_note)
# 4) Write results
result = pretty_midi.PrettyMIDI()
result_program = pretty_midi.instrument_name_to_program("Acoustic Grand Piano")
result_instrument = pretty_midi.Instrument(program=result_program)
result_instrument.notes = notes#[len(seq_temp):]
result.instruments.append(result_instrument)
filename = filename.split("/")
filename = filename[len(filename)-1]
result.write(DATASET_FILEPATH + "markov_with_prediction_midi/" + filename[:len(filename)-3] + "mid")
# 5) write result into csv file
midi_to_csv(notes[len(seq_temp):],DATASET_FILEPATH + "markov_with_prediction_csv/" + filename)
counter+=1
if steps!=0 and counter%steps==0:
print("\rProgress: " + str(counter/steps) + "%",end='')
#generate_prediction_with_string_based_for_dataset('../Datasets/PPDD-Sep2018_sym_mono_small/')