-
Notifications
You must be signed in to change notification settings - Fork 0
/
make_submission.py
55 lines (42 loc) · 1.59 KB
/
make_submission.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import argparse
import os
import numpy as np
import pandas as pd
from src.infer import ensemble
def parse() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Training for Kaggle Contrail")
parser.add_argument("--in_base_dir", default="input")
parser.add_argument("--model_dirs", nargs="+", required=True)
return parser.parse_args()
def rle_encode(y_pred, fg_val=1):
def list_to_string(x):
if x:
s = str(x).replace("[", "").replace("]", "").replace(",", "")
else:
s = "-"
return s
dots = np.where(y_pred.T.flatten() == fg_val)[0]
run_lengths = []
prev = -2
for b in dots:
if b > prev + 1:
run_lengths.extend((b + 1, 0))
run_lengths[-1] += 1
prev = b
return list_to_string(run_lengths)
def main(args: argparse.Namespace):
df = pd.DataFrame(
{"image_dir": [f"{args.in_base_dir}/test/{dir}" for dir in sorted(os.listdir(f"{args.in_base_dir}/test"))]}
)
model_weights = [(model_dir, 1.0) for model_dir in args.model_dirs]
preds = ensemble(df, model_weights)
percentile = 0.0016
idx = preds.flatten().argsort(descending=True)
threshold = preds.flatten()[idx[round(len(idx) * percentile)]]
binary_preds = (preds > threshold).detach().numpy().astype(int)
sub_df = pd.DataFrame({"record_id": sorted(os.listdir(f"{args.in_base_dir}/test"))})
sub_df["encoded_pixels"] = [rle_encode(binary_pred) for binary_pred in binary_preds]
print(sub_df)
sub_df.to_csv("submission.csv", index=False)
if __name__ == "__main__":
main(parse())