-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
adding dropout-by row #8
base: dropout_schedule
Are you sure you want to change the base?
Changes from 1 commit
6548b55
23ae730
614a868
c1d1ad1
14662b6
1d22219
5b8b98b
4137c9d
d721e59
1e2adab
463a4dc
d0290c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). | |
num_threads_ubm=32 | ||
nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned | ||
dropout_schedule='0,[email protected],[email protected],[email protected],0' | ||
dropout_per_frame=false | ||
chunk_width=150 | ||
chunk_left_context=40 | ||
chunk_right_context=0 | ||
|
@@ -193,15 +194,15 @@ if [ $stage -le 15 ]; then | |
relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 | ||
|
||
# check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults | ||
lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 | ||
lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false | ||
relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 | ||
relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 | ||
relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 | ||
lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 | ||
lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false | ||
relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 | ||
relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 | ||
relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 | ||
lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 | ||
lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false | ||
|
||
## adding the layers for chain branch | ||
output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 | ||
|
@@ -243,6 +244,7 @@ if [ $stage -le 16 ]; then | |
--egs.chunk-left-context $chunk_left_context \ | ||
--egs.chunk-right-context $chunk_right_context \ | ||
--trainer.dropout-schedule $dropout_schedule \ | ||
--trainer.dropout-per-frame $dropout_per_frame \ | ||
--trainer.num-chunk-per-minibatch 64 \ | ||
--trainer.frames-per-iter 1500000 \ | ||
--trainer.num-epochs 4 \ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -511,7 +511,7 @@ def _get_component_dropout(dropout_schedule, num_archives_processed): | |
+ initial_dropout) | ||
|
||
|
||
def apply_dropout(dropout_proportions, raw_model_string): | ||
def apply_dropout(dropout_proportions, dropout_per_frame, raw_model_string): | ||
"""Adds an nnet3-copy --edits line to modify raw_model_string to | ||
set dropout proportions according to dropout_proportions. | ||
|
||
|
@@ -523,10 +523,10 @@ def apply_dropout(dropout_proportions, raw_model_string): | |
|
||
for component_name, dropout_proportion in dropout_proportions: | ||
edit_config_lines.append( | ||
"set-dropout-proportion name={0} proportion={1}".format( | ||
component_name, dropout_proportion)) | ||
dropout_info.append("pattern/dropout-proportion={0}/{1}".format( | ||
component_name, dropout_proportion)) | ||
"set-dropout-proportion name={0} proportion={1} dropout-per-frame={2}".format( | ||
component_name, dropout_proportion, dropout_per_frame)) | ||
dropout_info.append("pattern/dropout-proportion={0}/{1} dropout-per-frame={2}".format( | ||
component_name, dropout_proportion, dropout_per_frame)) | ||
|
||
return ("""{raw_model_string} nnet3-copy --edits='{edits}' \ | ||
- - |""".format(raw_model_string=raw_model_string, | ||
|
@@ -771,6 +771,11 @@ def __init__(self): | |
lstm*=0,0.2,0'. More general should precede | ||
less general patterns, as they are applied | ||
sequentially.""") | ||
self.parser.add_argument("--trainer.dropout-per-frame", type=str, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this option required? Do you expect to change whether dropout is per frame or not during the training iterations? |
||
action=common_lib.NullstrToNoneAction, | ||
dest='dropout_per_frame', default=None, | ||
help="""this option is used to control whether | ||
using dropout by frame level or by vector level""") | ||
|
||
# General options | ||
self.parser.add_argument("--stage", type=int, default=-4, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -108,9 +108,7 @@ void DropoutComponent::InitFromConfig(ConfigLine *cfl) { | |
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need a branch here because dropout_per_frame defaults to false if not set (that's how you |
||
dropout_per_frame = false; | ||
Init(dim, dropout_proportion, dropout_per_frame); | ||
} | ||
else | ||
{ | ||
} else { | ||
Init(dim, dropout_proportion, dropout_per_frame); | ||
} | ||
} | ||
|
@@ -131,7 +129,7 @@ void DropoutComponent::Propagate(const ComponentPrecomputedIndexes *indexes, | |
|
||
BaseFloat dropout = dropout_proportion_; | ||
KALDI_ASSERT(dropout >= 0.0 && dropout <= 1.0); | ||
if(dropout_per_frame_ == true) | ||
if(dropout_per_frame_) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use the correct code style. Should be
and note the space after if. You can run misc/maintenance/cpplint.py on your code to check for style problems. |
||
{ | ||
// This const_cast is only safe assuming you don't attempt | ||
// to use multi-threaded code with the GPU. | ||
|
@@ -142,9 +140,7 @@ void DropoutComponent::Propagate(const ComponentPrecomputedIndexes *indexes, | |
// be zero and (1 - dropout) will be 1.0. | ||
|
||
out->MulElements(in); | ||
} | ||
else | ||
{ | ||
} else { | ||
|
||
// This const_cast is only safe assuming you don't attempt | ||
// to use multi-threaded code with the GPU. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -524,7 +524,7 @@ std::string NnetInfo(const Nnet &nnet) { | |
} | ||
|
||
void SetDropoutProportion(BaseFloat dropout_proportion, | ||
bool dropout_per_frame , | ||
bool dropout_per_frame, | ||
Nnet *nnet) { | ||
dropout_per_frame = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the input to the function ignored? |
||
for (int32 c = 0; c < nnet->NumComponents(); c++) { | ||
|
@@ -696,13 +696,13 @@ void ReadEditConfig(std::istream &edit_config_is, Nnet *nnet) { | |
// matches names of components, not nodes. | ||
config_line.GetValue("name", &name_pattern); | ||
BaseFloat proportion = -1; | ||
bool perframe = false; | ||
bool dropout_per_frame = false; | ||
if (!config_line.GetValue("proportion", &proportion)) { | ||
KALDI_ERR << "In edits-config, expected proportion to be set in line: " | ||
<< config_line.WholeLine(); | ||
} | ||
if (!config_line.GetValue("perframe", &perframe)) { | ||
perframe = false; | ||
if (!config_line.GetValue("dropout-per-frame", &dropout_per_frame)) { | ||
dropout_per_frame = false; | ||
} | ||
DropoutComponent *component = NULL; | ||
int32 num_dropout_proportions_set = 0; | ||
|
@@ -711,7 +711,7 @@ void ReadEditConfig(std::istream &edit_config_is, Nnet *nnet) { | |
name_pattern.c_str()) && | ||
(component = | ||
dynamic_cast<DropoutComponent*>(nnet->GetComponent(c)))) { | ||
component->SetDropoutProportion(proportion, perframe); | ||
component->SetDropoutProportion(proportion, dropout_per_frame); | ||
num_dropout_proportions_set++; | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as Vimal says, please remove this from the training code... does not need to be there.