-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsylpars
344 lines (259 loc) · 7.49 KB
/
sylpars
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#!/usr/local/bin/perl
####################sylpars##################
#This is a fragment of a constraint-based syllable
#parser written by Mike Hammond in Perl. This code
#is described in "Syllable parsing in English and
#French". If you know nothing of Perl, this code can
#be run in most UNIX systems by making this file
#executable ('chmod u+x sylpars') and then invoking
#it directly. The program can take any alphabetic string
#as input (though it doesn't understand anything beyond
#the difference between consonants and vowels). Stress
#is indicated with an apostrophe preceding the
#relevant vowel. The program will run with either
#French or English ranking on any input. For example,
#to run 'hat' with English ranking, type:
#sylpars "e" "h'at"
#Output can be saved by redirection to a file,
#e.g.
#sylparse "e" "h'at" > myfile
#The program can be invoked without specifying the
#langauge or input on the command line, but then output
#cannot be saved as easily.
#Mike Hammond
#Linguistics
#University of Arizona
#Tucson, AZ 85721
#################################beginning of program
#debug flag (for annoyingly verbose output)
#$mhdebug = 1;
################################beginning of main
#sets the language
&language;
#this gets the word
&getword;
#make the cv template....
&addcv;
#submits the word to the constraints
&submitword;
print "#" x 20;
print "\n\nword: $theword";
print "language: $language\n";
print "All done!\n\n";
#####################################end of main
#sets the flag controlling the constraint ranking
sub language {
if ($ARGV[0] =~ /^$/) {
print "\nEnglish (e: default) or French (f)?:";
$language = <STDIN>;
}
else {
$language = $ARGV[0];
}
if ($language =~ /^$/) { $language = "e"; }
if ($language =~ /^[eE]$/) { $language = 'English'; }
elsif ($language =~ /^[fF]$/) { $language = 'French'; }
else { die "You can only enter \"e\" or \"f\".\n"; }
}
#this gets the word to be parsed
sub getword {
if ($ARGV[1] ne "") {
$theword = $ARGV[1] . "\n";
}
else {
print "\nEnter word: ";
$theword = <STDIN>;
}
#this splits the characters up and puts them in an array
$size = length($theword) - 1;
if (defined($mhdebug)) {
print "number of characters: $size\n";
}
for ($i = 0; $i < $size; $i++) {
$mhinput[$i] = substr($theword, $i, 1);
}
#this checks that letters are letters
for ($i = 0; $i <= $#mhinput; $i++) {
$mhinput[$i] =~ /[a-zA-Z\']/ || die "Illegal characters!\n";
}
#this copies the stress mark into the same
#array as the following vowel
for ($i = 0; $i <= $#mhinput; $i++) {
if ($mhinput[$i-1] =~ /\'/) {
$mhinput[$i] =~ s/([aeiouyAEIOUY])/\'$1/;
}
}
#this removes stressmarks alone
foreach $letter (@mhinput) {
if ($letter !~ /\'$/) {
push(@tempinput, $letter);
}
}
@mhinput = @tempinput;
}
#creates the cv array
sub addcv {
foreach $let (@mhinput) {
if ($let =~ /[aeiouyAEIOUY]/) {
push (@mhcv, 'V');
}
else {
push (@mhcv, 'C');
}
}
if (defined($mhdebug)) {
print "CV skeleton:";
print join (" ", @mhcv);
print "\n\n";
}
}
#simulates the left to right identification of
#segments and the gradual identification of
#individual segments
sub submitword {
$pass = 1;
for ($i = 0; $i <= $#mhinput; $i++) {
#adds unidentified CVs to the input string
push(@submission, ($mhcv[$i]));
push(@structure, '-');
if (defined($mhdebug)) {
print "submitting...\n";
}
print "parse #$pass\n";
$writething = join(" ", @submission);
$writestruc = join(" ", @structure);
print "$writestruc\n";
print "$writething\n\n";
#generates candidate syllabifications
&mhgen;
#eliminates candidates via eval
&doconstraints;
#replaces CVs with real consonants and vowels
splice(@submission, $#submission, 1, $mhinput[$i]);
if (defined($mhdebug)) {
print "submitting...\n";
}
print "parse #$pass\n";
$writething = join(" ", @submission);
$writestruc = join(" ", @structure);
print "$writestruc\n";
print "$writething\n\n";
#eliminates more candidates with eval
&doconstraints;
}
}
#this figures out the optimal syllabification
#parse >> noonset >> onset in English
#parse >> onset >> noonset in French
sub doconstraints {
print "pass #$pass\n";
$pass++;
print "input candidate(s):\n";
$writething = join(" ", @submission);
$writestruc = join(" ", @structure);
print "$writestruc\n";
print "$writething\n\n";
#the basic constraint set is in dobasictypes and the
#different rankings in the two languages in ordering of
#the different specific constraints following
print "output candidate(s):\n";
&dobasictypes;
if ($language eq "English") {
&doparse;
&donoonset;
if ($skipflag != 1) {
&doonset;
}
}
else {
&doparse;
&doonset;
&donoonset;
}
$skipflag = 0;
if (defined($mhdebug)) {
print "outputting...\n";
}
$writething = join(" ", @submission);
$writestruc = join(" ", @structure);
print "$writestruc\n";
print "$writething\n\n";
}
#the following creates the candidate syllabifications for
#different segments. Coding for syllabic position:
# o: onset, n: nucleus, c: coda,
# -: unanalyzed, u: unsyllabified
sub mhgen {
if (defined($mhdebug)) {
print "generating candidate set...\n";
}
splice(@structure, $#structure, 1, 'oncu');
}
#the following handles a bunch of generalizations
#not treated by the explicit constraints and entailed
#by the local coding above.
sub dobasictypes {
if (defined($mhdebug)) {
print "checking basic stuff...\n";
}
#vowels can't be onsets or codas
if ($submission[$#submission] =~ /V/) {
$structure[$#structure] =~ s/[oc]//g;
}
#consonants can't be nuclei
if ($submission[$#submission] =~ /C/) {
$structure[$#structure] =~ s/n//;
}
#word-initial consonants can't be codas
if ($#structure == 0) {
$structure[0] =~ s/(.)c/$1/;
}
#word-final consonants can't be onsets
if ($#structure == $#mhinput) {
$structure[$#mhinput] =~ s/o(.)/$1/;
}
#preconsonantal consonants can't be onsets
if ($submission[$#submission] =~ /C/ && $#submission > 0) {
$structure[$#structure-1] =~ s/o(.)/$1/;
}
#postonset consonants can't be codas
if ($structure[$#structure-1] =~ /o/) {
$structure[$#structure] =~ s/(.)c/$1/;
}
}
#PARSE eliminates 'u' as a candidate if other parses
#are available
sub doparse {
if (defined($mhdebug)) {
print "checking PARSE...\n";
}
$structure[$#structure] =~ s/(.)u/$1/;
}
#NOONSET eliminates 'o' as an option for the preceding
#segment if the current segment is a stressless vowel
sub donoonset {
if (defined($mhdebug)) {
print "checking NOONSET...\n";
}
if ($submission[$#submission] =~ /^[aeiouyAEIOUY]$/) {
if ($#submission > 0) {
$structure[$#structure-1] =~ s/o(.)/$1/;
}
}
if ($submission[$#submission] =~ /V/) {
$skipflag = 1;
}
}
#ONSET eliminates 'c' as an option if the current segment
#is a vowel
sub doonset {
if (defined($mhdebug)) {
print "checking ONSET...\n";
}
if ($submission[$#submission] =~ /V/ || $submission[$#submission] =~ /[aeiouyAEIOUY]/) {
if ($#submission > 0) {
$structure[$#structure-1] =~ s/(.)c/$1/;
}
}
}