|
| 1 | +import argparse |
| 2 | + |
| 3 | + |
| 4 | +def init_args(): |
| 5 | + |
| 6 | + parser = argparse.ArgumentParser( |
| 7 | + description='Generate explanations for open-ended responses of LVLMs.' |
| 8 | + ) |
| 9 | + |
| 10 | + parser.add_argument( |
| 11 | + '--model', |
| 12 | + metavar='M', |
| 13 | + type=str, |
| 14 | + choices=['llava', 'cambrian', 'llava_next', 'mgm'], |
| 15 | + default='llava', |
| 16 | + help='The model to use for making predictions.') |
| 17 | + |
| 18 | + parser.add_argument( |
| 19 | + '--dataset', |
| 20 | + type=str, |
| 21 | + default='cvbench', |
| 22 | + help='The dataset to use for making predictions.') |
| 23 | + |
| 24 | + parser.add_argument( |
| 25 | + '--output_dir', |
| 26 | + type=str, |
| 27 | + help='The path to the output directory for saving explanation results.', |
| 28 | + required=True) |
| 29 | + |
| 30 | + parser.add_argument( |
| 31 | + '--size', |
| 32 | + type=int, |
| 33 | + default=32, |
| 34 | + help='The resolution of mask to be generated.') |
| 35 | + |
| 36 | + parser.add_argument( |
| 37 | + '--input_size', |
| 38 | + type=int, |
| 39 | + default=336, |
| 40 | + help='The input size to the network.') |
| 41 | + |
| 42 | + parser.add_argument( |
| 43 | + '--manual_seed', |
| 44 | + type=int, |
| 45 | + default=0, |
| 46 | + help='The manual seed for experiments.') |
| 47 | + |
| 48 | + parser.add_argument( |
| 49 | + '--method', |
| 50 | + type=str, |
| 51 | + choices=['iGOS+', 'iGOS++'], |
| 52 | + default='iGOS+' |
| 53 | + ) |
| 54 | + |
| 55 | + parser.add_argument( |
| 56 | + '--opt', |
| 57 | + type=str, |
| 58 | + choices=['LS', 'NAG'], |
| 59 | + default='NAG', |
| 60 | + help='The optimization algorithm.' |
| 61 | + ) |
| 62 | + |
| 63 | + parser.add_argument( |
| 64 | + '--diverse_k', |
| 65 | + type=int, |
| 66 | + default=1) |
| 67 | + |
| 68 | + parser.add_argument( |
| 69 | + '--init_posi', |
| 70 | + type=int, |
| 71 | + default=0, |
| 72 | + help='The initialization position, which cell of the K x K grid will be used to initialize the mask with nonzero values (use init_val to control it)') |
| 73 | + """ |
| 74 | + If K = 2: If K = 3: |
| 75 | + ------- ---------- |
| 76 | + |0 |1 | |0 |1 |2 | |
| 77 | + ------- ---------- |
| 78 | + |2 |3 | |3 |4 |5 | |
| 79 | + ------- ---------- |
| 80 | + |6 |7 |8 | |
| 81 | + ---------- |
| 82 | + """ |
| 83 | + |
| 84 | + parser.add_argument( |
| 85 | + '--init_val', |
| 86 | + type=float, |
| 87 | + default=0., |
| 88 | + help='The initialization value used to initialize the mask in only one cell of the K x K grid.') |
| 89 | + |
| 90 | + parser.add_argument( |
| 91 | + '--L1', |
| 92 | + type=float, |
| 93 | + default=0.1, |
| 94 | + help='The weight of L1 norm' |
| 95 | + ) |
| 96 | + |
| 97 | + parser.add_argument( |
| 98 | + '--L2', |
| 99 | + type=float, |
| 100 | + default=1.0, |
| 101 | + help='The weight of L2 norm' |
| 102 | + ) |
| 103 | + |
| 104 | + parser.add_argument( |
| 105 | + '--gamma', |
| 106 | + type=float, |
| 107 | + default=0.2, |
| 108 | + help='The exponential decay rate of the graduated non-convexity' |
| 109 | + ) |
| 110 | + |
| 111 | + parser.add_argument( |
| 112 | + '--L3', |
| 113 | + type=float, |
| 114 | + default=20.0, |
| 115 | + help='The weight of BTV norm' |
| 116 | + ) |
| 117 | + |
| 118 | + parser.add_argument( |
| 119 | + '--momentum', |
| 120 | + type=int, |
| 121 | + default=3 |
| 122 | + ) |
| 123 | + |
| 124 | + parser.add_argument( |
| 125 | + '--ig_iter', |
| 126 | + type=int, |
| 127 | + help='The step size of the integtated gradient accumulation.', |
| 128 | + default=5) |
| 129 | + |
| 130 | + parser.add_argument( |
| 131 | + '--iterations', |
| 132 | + type=int, |
| 133 | + default=5 |
| 134 | + ) |
| 135 | + |
| 136 | + parser.add_argument( |
| 137 | + '--lr', |
| 138 | + type=float, |
| 139 | + default=10, |
| 140 | + help='The step size for updating the mask' |
| 141 | + ) |
| 142 | + |
| 143 | + parser.add_argument( |
| 144 | + '--model_base', |
| 145 | + default=None, |
| 146 | + type=str, |
| 147 | + help='The path to the model base file to be used.' |
| 148 | + ) |
| 149 | + |
| 150 | + parser.add_argument( |
| 151 | + "--data_path", |
| 152 | + type=str, |
| 153 | + required=True, |
| 154 | + help="The path to the input question file." |
| 155 | + ) |
| 156 | + |
| 157 | + parser.add_argument( |
| 158 | + "--image_folder", |
| 159 | + type=str, |
| 160 | + required=True, |
| 161 | + help="The path to the image folder" |
| 162 | + ) |
| 163 | + |
| 164 | + # LVLM generation settings |
| 165 | + parser.add_argument("--temperature", type=float,default=0) |
| 166 | + parser.add_argument("--top_p", type=float, default=None) |
| 167 | + parser.add_argument("--max_new_tokens", type=int, default=1024) |
| 168 | + parser.add_argument("--num_beams", type=int, default=1) |
| 169 | + |
| 170 | + parser.add_argument("--use_yake", type=bool, default=False, help="Whether use yake to detect keywords") |
| 171 | + parser.add_argument("--choices", type=bool, default=False, help="Whether ask the LVLMs to generate single choice instead of open-ended responses") |
| 172 | + parser.add_argument("--ablation_zero", type=bool, default=False, help="Ablation of baseline image using all-zero images") |
| 173 | + parser.add_argument("--ablation_noise", type=bool, default=False, help="Ablation of baseline image using random noise") |
| 174 | + |
| 175 | + return parser.parse_args() |
0 commit comments