diff --git a/.gitignore b/.gitignore index 2e61c87..4cc71dd 100644 --- a/.gitignore +++ b/.gitignore @@ -106,3 +106,6 @@ venv.bak/ *.pyc exp/ backup_exp/ + +# MacOS +.DS_Store \ No newline at end of file diff --git a/README_bay.md b/README_bay.md new file mode 100644 index 0000000..390ffe2 --- /dev/null +++ b/README_bay.md @@ -0,0 +1,91 @@ +# Bayesian-Crowd-Counting (ICCV 2019 oral) +[Arxiv](https://arxiv.org/abs/1908.03684) | [CVF](http://openaccess.thecvf.com/content_ICCV_2019/papers/Ma_Bayesian_Loss_for_Crowd_Count_Estimation_With_Point_Supervision_ICCV_2019_paper.pdf) +### Official Implement of ICCV 2019 oral paper "Bayesian Loss for Crowd Count Estimation with Point Supervision" + +## Visualization +### Bayesian + +![](imgs/bayesian.png) + +### Bayesian+ + +![](imgs/bayesian+.png) + +### Density + +![](imgs/density.png) + +## Citation +If you use this code for your research, please cite our paper: + +``` +@inproceedings{ma2019bayesian, + title={Bayesian loss for crowd count estimation with point supervision}, + author={Ma, Zhiheng and Wei, Xing and Hong, Xiaopeng and Gong, Yihong}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, + pages={6142--6151}, + year={2019} +} +``` + +## Code + +### Install dependencies + +torch >= 1.0 torchvision opencv numpy scipy, all the dependencies can be easily installed by pip or conda + +This code was tested with python 3.6 + +### Train and Test + +1、 Dowload Dataset UCF-QNRF [Link](https://www.crcv.ucf.edu/data/ucf-qnrf/) + +2、 Pre-Process Data (resize image and split train/validation) + +``` +python preprocess_dataset.py --origin_dir --data_dir +``` + +3、 Train model (validate on single GTX Titan X) + +``` +python train.py --data_dir --save_dir +``` + +4、 Test Model +``` +python test.py --data_dir --save_dir +``` +The result is slightly influenced by the random seed, but fixing the random seed (have to set cuda_benchmark to False) will make training time extrodinary long, so sometimes you can get a slightly worse result than the reported result, but most of time you can get a better result than the reported one. If you find this code is useful, please give us a star and cite our paper, have fun. + +5、 Training on ShanghaiTech Dataset + +Change dataloader to crowd_sh.py + +For shanghaitech a, you should set learning rate to 1e-6, and bg_ratio to 0.1 + +### Pretrain Weight +#### UCF-QNRF + +Baidu Yun [Link](https://pan.baidu.com/s/1Evxxu1skHni3Iv3VxdcZvA) extract code: x9wc + +Google Drive [Link](https://drive.google.com/file/d/1i22E7_zigkSm7nBnqMaEv00MD3CPhIDk/view?usp=sharing) + +#### ShanghaiTech A + +Baidu Yun [Link](https://pan.baidu.com/s/1GlaxGzFI8qFCHbqu56qSRw) extract code: tx0m + +Goodle Drive [Link](https://drive.google.com/file/d/13bEdshBY-brUvLSwTCOqDlK5QKcZIAAH/view?usp=sharing) + +#### ShanghaiTech B + +Baidu Yun [Link](https://pan.baidu.com/s/1YYg-a-sdhBAHZRJzZOU-6Q) extract code: a15u + +Goodle Drive [Link](https://drive.google.com/file/d/1woK-bI_JyeY9wZL2pXsWgPzQqhD8Qy0u/view?usp=sharing) + +### License + +GNU GENERAL PUBLIC LICENSE +Version 3, 29 June 2007 +Copyright © 2007 Free Software Foundation, Inc. + diff --git a/bayesian/train.txt b/bayesian/train.txt new file mode 100644 index 0000000..152dee1 --- /dev/null +++ b/bayesian/train.txt @@ -0,0 +1,1081 @@ +img_0526.jpg +img_0639.jpg +img_0826.jpg +img_0415.jpg +img_0720.jpg +img_0123.jpg +img_0529.jpg +img_1071.jpg +img_0501.jpg +img_0804.jpg +img_0873.jpg +img_0601.jpg +img_0177.jpg +img_0173.jpg +img_0675.jpg +img_1001.jpg +img_0096.jpg +img_1139.jpg +img_0001.jpg +img_0084.jpg +img_0395.jpg +img_0166.jpg +img_0368.jpg +img_0093.jpg +img_0004.jpg +img_0572.jpg +img_0956.jpg +img_0721.jpg +img_0120.jpg +img_0554.jpg +img_0308.jpg +img_0131.jpg +img_0992.jpg +img_0156.jpg +img_0532.jpg +img_0476.jpg +img_0427.jpg +img_1162.jpg +img_0660.jpg +img_0538.jpg +img_0298.jpg +img_0306.jpg +img_1173.jpg +img_1157.jpg +img_0777.jpg +img_0859.jpg +img_0537.jpg +img_0236.jpg +img_0986.jpg +img_0370.jpg +img_0491.jpg +img_1150.jpg +img_0719.jpg +img_1083.jpg +img_0107.jpg +img_1029.jpg +img_0927.jpg +img_0893.jpg +img_0286.jpg +img_1135.jpg +img_0640.jpg +img_0530.jpg +img_1115.jpg +img_0533.jpg +img_0105.jpg +img_0945.jpg +img_1035.jpg +img_0484.jpg +img_1168.jpg +img_0760.jpg +img_0939.jpg +img_0907.jpg +img_0401.jpg +img_0429.jpg +img_0828.jpg +img_1167.jpg +img_0144.jpg +img_0553.jpg +img_0421.jpg +img_0560.jpg +img_0743.jpg +img_0817.jpg +img_0657.jpg +img_0106.jpg +img_0079.jpg +img_0473.jpg +img_0865.jpg +img_0730.jpg +img_0989.jpg +img_0243.jpg +img_0182.jpg +img_0252.jpg +img_0812.jpg +img_0508.jpg +img_0744.jpg +img_0439.jpg +img_0181.jpg +img_0965.jpg +img_0487.jpg +img_0710.jpg +img_1054.jpg +img_0947.jpg +img_0321.jpg +img_0758.jpg +img_0014.jpg +img_0504.jpg +img_0674.jpg +img_0991.jpg +img_0358.jpg +img_1138.jpg +img_0019.jpg +img_0677.jpg +img_0336.jpg +img_0070.jpg +img_0766.jpg +img_0612.jpg +img_1109.jpg +img_0840.jpg +img_0616.jpg +img_0926.jpg +img_0376.jpg +img_0761.jpg +img_0020.jpg +img_0795.jpg +img_0046.jpg +img_0459.jpg +img_0267.jpg +img_0428.jpg +img_1122.jpg +img_0247.jpg +img_1143.jpg +img_0290.jpg +img_0524.jpg +img_0275.jpg +img_1120.jpg +img_0115.jpg +img_0698.jpg +img_0092.jpg +img_0922.jpg +img_1052.jpg +img_0297.jpg +img_0112.jpg +img_0180.jpg +img_0520.jpg +img_0351.jpg +img_0478.jpg +img_0588.jpg +img_0109.jpg +img_0738.jpg +img_0592.jpg +img_0752.jpg +img_1028.jpg +img_1164.jpg +img_0450.jpg +img_0168.jpg +img_1108.jpg +img_0799.jpg +img_0649.jpg +img_0272.jpg +img_0902.jpg +img_0874.jpg +img_0870.jpg +img_0821.jpg +img_0153.jpg +img_0426.jpg +img_0949.jpg +img_0527.jpg +img_1198.jpg +img_0443.jpg +img_0063.jpg +img_0013.jpg +img_0564.jpg +img_0040.jpg +img_0764.jpg +img_0411.jpg +img_0118.jpg +img_1172.jpg +img_0196.jpg +img_0879.jpg +img_0985.jpg +img_0437.jpg +img_0918.jpg +img_0493.jpg +img_0271.jpg +img_0860.jpg +img_0059.jpg +img_0645.jpg +img_1126.jpg +img_0911.jpg +img_1082.jpg +img_0383.jpg +img_0422.jpg +img_0139.jpg +img_1192.jpg +img_0904.jpg +img_0503.jpg +img_0512.jpg +img_0541.jpg +img_0330.jpg +img_0348.jpg +img_0425.jpg +img_0673.jpg +img_0210.jpg +img_0950.jpg +img_0151.jpg +img_0792.jpg +img_0469.jpg +img_0661.jpg +img_0003.jpg +img_0089.jpg +img_0312.jpg +img_0555.jpg +img_0215.jpg +img_0023.jpg +img_1129.jpg +img_0249.jpg +img_0451.jpg +img_1032.jpg +img_0689.jpg +img_1189.jpg +img_0391.jpg +img_0146.jpg +img_0653.jpg +img_0248.jpg +img_0695.jpg +img_0402.jpg +img_0075.jpg +img_1018.jpg +img_1020.jpg +img_0163.jpg +img_0440.jpg +img_0756.jpg +img_0253.jpg +img_0712.jpg +img_0962.jpg +img_0471.jpg +img_0842.jpg +img_0525.jpg +img_1176.jpg +img_1021.jpg +img_0127.jpg +img_0295.jpg +img_1045.jpg +img_1088.jpg +img_1090.jpg +img_0622.jpg +img_0650.jpg +img_0518.jpg +img_0854.jpg +img_0262.jpg +img_0323.jpg +img_0522.jpg +img_0933.jpg +img_0951.jpg +img_0366.jpg +img_0325.jpg +img_1034.jpg +img_0827.jpg +img_0194.jpg +img_0636.jpg +img_0051.jpg +img_0683.jpg +img_0558.jpg +img_0309.jpg +img_0345.jpg +img_0438.jpg +img_1091.jpg +img_0577.jpg +img_0500.jpg +img_0279.jpg +img_1145.jpg +img_0886.jpg +img_1161.jpg +img_0617.jpg +img_0726.jpg +img_0620.jpg +img_0444.jpg +img_1118.jpg +img_0506.jpg +img_0164.jpg +img_0507.jpg +img_0614.jpg +img_0769.jpg +img_1131.jpg +img_0185.jpg +img_0694.jpg +img_1055.jpg +img_0754.jpg +img_0569.jpg +img_0317.jpg +img_0228.jpg +img_0492.jpg +img_1190.jpg +img_0566.jpg +img_0921.jpg +img_0818.jpg +img_0204.jpg +img_0974.jpg +img_0866.jpg +img_1039.jpg +img_0101.jpg +img_0169.jpg +img_0375.jpg +img_0334.jpg +img_1078.jpg +img_0061.jpg +img_0113.jpg +img_0981.jpg +img_0080.jpg +img_0324.jpg +img_0316.jpg +img_0643.jpg +img_0408.jpg +img_0890.jpg +img_0363.jpg +img_0765.jpg +img_0822.jpg +img_0430.jpg +img_0245.jpg +img_0671.jpg +img_0486.jpg +img_1201.jpg +img_0129.jpg +img_1142.jpg +img_0843.jpg +img_1133.jpg +img_0238.jpg +img_0955.jpg +img_1017.jpg +img_0858.jpg +img_1154.jpg +img_0559.jpg +img_0002.jpg +img_0407.jpg +img_1146.jpg +img_1086.jpg +img_0495.jpg +img_0857.jpg +img_0133.jpg +img_0121.jpg +img_0973.jpg +img_0830.jpg +img_0165.jpg +img_0278.jpg +img_1012.jpg +img_0393.jpg +img_0202.jpg +img_0700.jpg +img_0313.jpg +img_0024.jpg +img_0055.jpg +img_0979.jpg +img_0162.jpg +img_0135.jpg +img_0098.jpg +img_0727.jpg +img_0969.jpg +img_1137.jpg +img_0932.jpg +img_1102.jpg +img_0301.jpg +img_0047.jpg +img_0595.jpg +img_0805.jpg +img_0801.jpg +img_1151.jpg +img_0387.jpg +img_0999.jpg +img_0136.jpg +img_1037.jpg +img_1087.jpg +img_1186.jpg +img_0032.jpg +img_0195.jpg +img_0360.jpg +img_0276.jpg +img_0642.jpg +img_0913.jpg +img_0231.jpg +img_0670.jpg +img_1123.jpg +img_0517.jpg +img_0707.jpg +img_0088.jpg +img_0594.jpg +img_0838.jpg +img_0848.jpg +img_0354.jpg +img_0936.jpg +img_0876.jpg +img_1081.jpg +img_0322.jpg +img_0637.jpg +img_0739.jpg +img_0917.jpg +img_0244.jpg +img_0591.jpg +img_0628.jpg +img_0964.jpg +img_0691.jpg +img_0609.jpg +img_0342.jpg +img_1097.jpg +img_1077.jpg +img_0502.jpg +img_0423.jpg +img_0561.jpg +img_1059.jpg +img_0568.jpg +img_0920.jpg +img_0389.jpg +img_0940.jpg +img_0787.jpg +img_0634.jpg +img_0516.jpg +img_0900.jpg +img_0463.jpg +img_0942.jpg +img_0796.jpg +img_0835.jpg +img_0789.jpg +img_0184.jpg +img_0397.jpg +img_1195.jpg +img_1089.jpg +img_0319.jpg +img_0328.jpg +img_0724.jpg +img_0852.jpg +img_0662.jpg +img_0225.jpg +img_0479.jpg +img_0266.jpg +img_0499.jpg +img_0134.jpg +img_1023.jpg +img_1064.jpg +img_0400.jpg +img_0226.jpg +img_0015.jpg +img_0203.jpg +img_0548.jpg +img_1084.jpg +img_0970.jpg +img_0718.jpg +img_0138.jpg +img_0095.jpg +img_0831.jpg +img_0482.jpg +img_1000.jpg +img_0234.jpg +img_0183.jpg +img_0687.jpg +img_0923.jpg +img_0197.jpg +img_1016.jpg +img_1100.jpg +img_0034.jpg +img_0587.jpg +img_0229.jpg +img_1178.jpg +img_0124.jpg +img_0424.jpg +img_0496.jpg +img_0179.jpg +img_1110.jpg +img_0998.jpg +img_0742.jpg +img_0578.jpg +img_0207.jpg +img_0305.jpg +img_0373.jpg +img_0971.jpg +img_0292.jpg +img_0861.jpg +img_0621.jpg +img_0414.jpg +img_1140.jpg +img_0737.jpg +img_0176.jpg +img_1057.jpg +img_1095.jpg +img_0667.jpg +img_0755.jpg +img_0318.jpg +img_0170.jpg +img_0418.jpg +img_0178.jpg +img_1200.jpg +img_0021.jpg +img_0652.jpg +img_0327.jpg +img_0627.jpg +img_1051.jpg +img_0837.jpg +img_0352.jpg +img_0029.jpg +img_0833.jpg +img_0952.jpg +img_0488.jpg +img_0474.jpg +img_0702.jpg +img_0819.jpg +img_1188.jpg +img_0261.jpg +img_0685.jpg +img_1024.jpg +img_0008.jpg +img_0734.jpg +img_0509.jpg +img_0888.jpg +img_0676.jpg +img_0404.jpg +img_1046.jpg +img_1127.jpg +img_1008.jpg +img_0161.jpg +img_0699.jpg +img_0085.jpg +img_0703.jpg +img_0083.jpg +img_0934.jpg +img_0626.jpg +img_1170.jpg +img_1065.jpg +img_0664.jpg +img_0883.jpg +img_0655.jpg +img_0263.jpg +img_1005.jpg +img_1061.jpg +img_0333.jpg +img_0881.jpg +img_1041.jpg +img_0540.jpg +img_1185.jpg +img_0953.jpg +img_0586.jpg +img_1011.jpg +img_0846.jpg +img_0149.jpg +img_1075.jpg +img_0894.jpg +img_0759.jpg +img_1177.jpg +img_0258.jpg +img_0171.jpg +img_0740.jpg +img_0006.jpg +img_0353.jpg +img_0615.jpg +img_0810.jpg +img_0142.jpg +img_0958.jpg +img_0584.jpg +img_0390.jpg +img_0585.jpg +img_0365.jpg +img_0026.jpg +img_0458.jpg +img_0143.jpg +img_0575.jpg +img_1027.jpg +img_1183.jpg +img_0535.jpg +img_0891.jpg +img_1085.jpg +img_0757.jpg +img_0549.jpg +img_0436.jpg +img_0815.jpg +img_0635.jpg +img_0954.jpg +img_0367.jpg +img_0064.jpg +img_0410.jpg +img_0277.jpg +img_1111.jpg +img_1025.jpg +img_0434.jpg +img_1175.jpg +img_1171.jpg +img_0610.jpg +img_0618.jpg +img_0208.jpg +img_0281.jpg +img_0058.jpg +img_0851.jpg +img_0300.jpg +img_0017.jpg +img_0110.jpg +img_0265.jpg +img_0362.jpg +img_1038.jpg +img_0580.jpg +img_1096.jpg +img_0972.jpg +img_0666.jpg +img_0090.jpg +img_1007.jpg +img_0982.jpg +img_0287.jpg +img_0714.jpg +img_0218.jpg +img_0832.jpg +img_0145.jpg +img_0072.jpg +img_0222.jpg +img_0137.jpg +img_0741.jpg +img_0028.jpg +img_0413.jpg +img_0232.jpg +img_0573.jpg +img_0849.jpg +img_0855.jpg +img_0770.jpg +img_0283.jpg +img_0914.jpg +img_0611.jpg +img_1047.jpg +img_0596.jpg +img_0706.jpg +img_0847.jpg +img_0868.jpg +img_0193.jpg +img_0780.jpg +img_0100.jpg +img_0786.jpg +img_0337.jpg +img_0728.jpg +img_0656.jpg +img_0602.jpg +img_1015.jpg +img_0273.jpg +img_0797.jpg +img_0398.jpg +img_0693.jpg +img_0944.jpg +img_0593.jpg +img_0768.jpg +img_0995.jpg +img_1125.jpg +img_0078.jpg +img_0543.jpg +img_0167.jpg +img_0420.jpg +img_0264.jpg +img_0016.jpg +img_0599.jpg +img_0417.jpg +img_0448.jpg +img_0748.jpg +img_0311.jpg +img_0071.jpg +img_0749.jpg +img_0941.jpg +img_0237.jpg +img_0214.jpg +img_1149.jpg +img_0241.jpg +img_0461.jpg +img_0018.jpg +img_0356.jpg +img_0483.jpg +img_0099.jpg +img_0130.jpg +img_0372.jpg +img_0800.jpg +img_0654.jpg +img_0544.jpg +img_1099.jpg +img_1068.jpg +img_0326.jpg +img_0374.jpg +img_0074.jpg +img_0938.jpg +img_0117.jpg +img_0456.jpg +img_0901.jpg +img_0713.jpg +img_0788.jpg +img_0665.jpg +img_0294.jpg +img_0841.jpg +img_0269.jpg +img_0579.jpg +img_1098.jpg +img_0466.jpg +img_0480.jpg +img_0709.jpg +img_0672.jpg +img_1010.jpg +img_0314.jpg +img_0043.jpg +img_0349.jpg +img_0172.jpg +img_1187.jpg +img_0371.jpg +img_0320.jpg +img_1103.jpg +img_1159.jpg +img_0629.jpg +img_0399.jpg +img_0663.jpg +img_0335.jpg +img_1148.jpg +img_0108.jpg +img_0254.jpg +img_0432.jpg +img_0915.jpg +img_0624.jpg +img_0997.jpg +img_0711.jpg +img_0704.jpg +img_1147.jpg +img_0036.jpg +img_0519.jpg +img_0680.jpg +img_0498.jpg +img_0651.jpg +img_0230.jpg +img_0198.jpg +img_0905.jpg +img_0751.jpg +img_0928.jpg +img_0630.jpg +img_0140.jpg +img_0644.jpg +img_0776.jpg +img_0057.jpg +img_0361.jpg +img_0209.jpg +img_0158.jpg +img_1160.jpg +img_1169.jpg +img_0735.jpg +img_0551.jpg +img_0681.jpg +img_0515.jpg +img_0077.jpg +img_0968.jpg +img_0240.jpg +img_1166.jpg +img_0937.jpg +img_0877.jpg +img_0513.jpg +img_0528.jpg +img_0150.jpg +img_1165.jpg +img_0200.jpg +img_0246.jpg +img_0869.jpg +img_0011.jpg +img_0160.jpg +img_0464.jpg +img_0285.jpg +img_0132.jpg +img_0701.jpg +img_0082.jpg +img_1182.jpg +img_0030.jpg +img_0126.jpg +img_0632.jpg +img_0731.jpg +img_0875.jpg +img_0978.jpg +img_0717.jpg +img_0460.jpg +img_1044.jpg +img_1194.jpg +img_0910.jpg +img_0049.jpg +img_0331.jpg +img_0213.jpg +img_0885.jpg +img_0468.jpg +img_0419.jpg +img_1158.jpg +img_0022.jpg +img_0174.jpg +img_0747.jpg +img_1006.jpg +img_0381.jpg +img_1036.jpg +img_0863.jpg +img_0994.jpg +img_0783.jpg +img_0346.jpg +img_0233.jpg +img_0820.jpg +img_1107.jpg +img_1193.jpg +img_0943.jpg +img_1191.jpg +img_0005.jpg +img_0087.jpg +img_0039.jpg +img_0813.jpg +img_0239.jpg +img_0206.jpg +img_0256.jpg +img_1070.jpg +img_0409.jpg +img_0377.jpg +img_0446.jpg +img_0216.jpg +img_0189.jpg +img_0785.jpg +img_0041.jpg +img_0598.jpg +img_0310.jpg +img_0307.jpg +img_1093.jpg +img_0465.jpg +img_0746.jpg +img_0380.jpg +img_0732.jpg +img_0781.jpg +img_0906.jpg +img_0619.jpg +img_0604.jpg +img_0983.jpg +img_0753.jpg +img_0211.jpg +img_0552.jpg +img_0892.jpg +img_0767.jpg +img_1180.jpg +img_1069.jpg +img_0154.jpg +img_0899.jpg +img_0343.jpg +img_0025.jpg +img_1196.jpg +img_0155.jpg +img_0433.jpg +img_0597.jpg +img_0570.jpg +img_0867.jpg +img_0223.jpg +img_0581.jpg +img_0186.jpg +img_0122.jpg +img_1134.jpg +img_0340.jpg +img_0957.jpg +img_0364.jpg +img_0069.jpg +img_1114.jpg +img_0646.jpg +img_0679.jpg +img_0623.jpg +img_0392.jpg +img_0814.jpg +img_0589.jpg +img_0299.jpg +img_0931.jpg +img_0836.jpg +img_0963.jpg +img_0094.jpg +img_0987.jpg +img_0930.jpg +img_0976.jpg +img_0924.jpg +img_0384.jpg +img_0035.jpg +img_0076.jpg +img_1101.jpg +img_0405.jpg +img_0350.jpg +img_0147.jpg +img_0659.jpg +img_1013.jpg +img_0948.jpg +img_0066.jpg +img_1132.jpg +img_0829.jpg +img_0690.jpg +img_1060.jpg +img_0457.jpg +img_0897.jpg +img_0825.jpg +img_1163.jpg +img_0803.jpg +img_0563.jpg +img_0574.jpg +img_0175.jpg +img_1112.jpg +img_0668.jpg +img_0045.jpg +img_0259.jpg +img_0341.jpg +img_1067.jpg +img_1040.jpg +img_1106.jpg +img_0205.jpg +img_0296.jpg +img_0255.jpg +img_1152.jpg +img_0772.jpg +img_0613.jpg +img_1121.jpg +img_0834.jpg +img_0406.jpg +img_0762.jpg +img_0442.jpg +img_0192.jpg +img_0044.jpg +img_0774.jpg +img_0606.jpg +img_0359.jpg +img_0467.jpg +img_0779.jpg +img_0060.jpg +img_1074.jpg +img_0494.jpg +img_1153.jpg +img_0102.jpg +img_0582.jpg +img_0386.jpg +img_0212.jpg +img_0625.jpg +img_0844.jpg +img_0872.jpg +img_1105.jpg +img_0396.jpg +img_1119.jpg +img_0052.jpg +img_0454.jpg +img_1179.jpg +img_0862.jpg +img_0481.jpg +img_1026.jpg +img_0511.jpg +img_0912.jpg +img_1124.jpg +img_0148.jpg +img_0960.jpg +img_0523.jpg +img_0531.jpg +img_0729.jpg +img_0571.jpg +img_0908.jpg +img_0889.jpg +img_0188.jpg +img_0037.jpg +img_0716.jpg +img_1014.jpg +img_0394.jpg +img_1056.jpg +img_0462.jpg +img_0850.jpg +img_0784.jpg +img_1002.jpg +img_0763.jpg +img_0159.jpg +img_0009.jpg +img_0708.jpg +img_1050.jpg +img_0678.jpg +img_0648.jpg +img_0010.jpg +img_1031.jpg +img_0445.jpg +img_0355.jpg +img_1117.jpg +img_0378.jpg +img_0550.jpg +img_0217.jpg +img_0260.jpg +img_0816.jpg +img_0996.jpg +img_0081.jpg +img_0878.jpg +img_0199.jpg +img_0431.jpg +img_1144.jpg +img_0688.jpg +img_0745.jpg +img_0686.jpg +img_1042.jpg +img_0187.jpg +img_1066.jpg +img_0682.jpg +img_0048.jpg +img_0896.jpg +img_0608.jpg +img_1003.jpg +img_1156.jpg +img_0723.jpg +img_0692.jpg +img_0220.jpg +img_0993.jpg +img_1197.jpg +img_0447.jpg +img_0369.jpg +img_0056.jpg +img_0807.jpg +img_0315.jpg +img_0567.jpg +img_0452.jpg +img_1128.jpg +img_0647.jpg +img_0242.jpg +img_0201.jpg +img_0497.jpg +img_0031.jpg +img_0771.jpg +img_0547.jpg +img_0705.jpg +img_0725.jpg +img_1058.jpg +img_0053.jpg +img_1043.jpg +img_0722.jpg +img_0435.jpg +img_0284.jpg +img_0583.jpg +img_0882.jpg +img_0111.jpg +img_0959.jpg +img_1076.jpg +img_0880.jpg +img_0224.jpg +img_0977.jpg +img_0270.jpg +img_0793.jpg +img_0603.jpg +img_1116.jpg +img_0304.jpg +img_0884.jpg +img_1136.jpg +img_0235.jpg +img_0412.jpg +img_0980.jpg +img_0988.jpg +img_0773.jpg +img_1174.jpg +img_0562.jpg +img_0871.jpg +img_0798.jpg +img_0453.jpg +img_0696.jpg +img_0104.jpg +img_0607.jpg +img_0669.jpg +img_0293.jpg +img_1141.jpg +img_0329.jpg +img_0534.jpg +img_1113.jpg +img_0288.jpg +img_0961.jpg +img_0388.jpg +img_0073.jpg +img_0141.jpg +img_0935.jpg +img_1062.jpg +img_0227.jpg +img_0895.jpg +img_0449.jpg +img_0565.jpg +img_1009.jpg +img_0282.jpg +img_0806.jpg +img_1033.jpg +img_0332.jpg +img_0903.jpg +img_0475.jpg +img_0050.jpg +img_0455.jpg +img_0845.jpg +img_0946.jpg +img_0490.jpg +img_0274.jpg +img_0909.jpg +img_0966.jpg +img_0219.jpg +img_0898.jpg +img_0403.jpg diff --git a/bayesian/val.txt b/bayesian/val.txt new file mode 100644 index 0000000..fa004ff --- /dev/null +++ b/bayesian/val.txt @@ -0,0 +1,120 @@ +img_0042.jpg +img_0697.jpg +img_0012.jpg +img_0062.jpg +img_0990.jpg +img_1048.jpg +img_0576.jpg +img_0802.jpg +img_0116.jpg +img_0119.jpg +img_0967.jpg +img_0054.jpg +img_0782.jpg +img_0514.jpg +img_0929.jpg +img_0809.jpg +img_0033.jpg +img_0125.jpg +img_0633.jpg +img_0038.jpg +img_0775.jpg +img_0600.jpg +img_0157.jpg +img_0824.jpg +img_0103.jpg +img_0984.jpg +img_0250.jpg +img_0505.jpg +img_0631.jpg +img_0556.jpg +img_1049.jpg +img_1181.jpg +img_0097.jpg +img_0536.jpg +img_1104.jpg +img_0733.jpg +img_1130.jpg +img_0808.jpg +img_0086.jpg +img_0302.jpg +img_0114.jpg +img_0470.jpg +img_0715.jpg +img_0641.jpg +img_0557.jpg +img_0510.jpg +img_0152.jpg +img_0485.jpg +img_0190.jpg +img_0065.jpg +img_0839.jpg +img_0068.jpg +img_0864.jpg +img_0477.jpg +img_0441.jpg +img_0546.jpg +img_0091.jpg +img_0853.jpg +img_0975.jpg +img_0357.jpg +img_1004.jpg +img_0794.jpg +img_0750.jpg +img_0791.jpg +img_0605.jpg +img_0590.jpg +img_0489.jpg +img_0191.jpg +img_0007.jpg +img_0778.jpg +img_0658.jpg +img_0289.jpg +img_0925.jpg +img_1184.jpg +img_0521.jpg +img_0291.jpg +img_0823.jpg +img_0382.jpg +img_0416.jpg +img_0736.jpg +img_0268.jpg +img_0128.jpg +img_0280.jpg +img_1022.jpg +img_0545.jpg +img_0257.jpg +img_0251.jpg +img_0684.jpg +img_1092.jpg +img_0638.jpg +img_1079.jpg +img_0790.jpg +img_0811.jpg +img_0303.jpg +img_0542.jpg +img_1019.jpg +img_0472.jpg +img_0027.jpg +img_0539.jpg +img_0856.jpg +img_1094.jpg +img_1030.jpg +img_1063.jpg +img_0887.jpg +img_0067.jpg +img_0379.jpg +img_0919.jpg +img_1155.jpg +img_0221.jpg +img_1053.jpg +img_0916.jpg +img_1072.jpg +img_0347.jpg +img_1199.jpg +img_1080.jpg +img_0385.jpg +img_0344.jpg +img_1073.jpg +img_0339.jpg +img_0338.jpg diff --git a/config.py b/config.py index 2da73fd..c7c512d 100644 --- a/config.py +++ b/config.py @@ -25,7 +25,7 @@ __C.PRE_GCC = False # use the pretrained model on GCC dataset __C.PRE_GCC_MODEL = 'path to model' # path to model -__C.RESUME = False # contine training +__C.RESUME = False # continue training __C.RESUME_PATH = './exp/04-25_09-19_SHHB_VGG_1e-05/latest_state.pth' # __C.GPU_ID = [0,1] # sigle gpu: [0], [1] ...; multi gpus: [0,1] diff --git a/datasets/FUDAN/FUDAN.py b/datasets/FUDAN/FUDAN.py new file mode 100644 index 0000000..57c5ad1 --- /dev/null +++ b/datasets/FUDAN/FUDAN.py @@ -0,0 +1,68 @@ +import numpy as np +import os +import random +from scipy import io as sio +import sys +import torch +from torch.utils import data +from PIL import Image, ImageOps + +import pandas as pd + +from config import cfg + + +class Fudan(data.Dataset): + def __init__( + self, + data_path, + mode, + main_transform=None, + img_transform=None, + gt_transform=None, + ): + self.img_path = data_path + "/img" + self.gt_path = data_path + "/den" + self.data_files = [ + filename + for filename in os.listdir(self.img_path) + if os.path.isfile(os.path.join(self.img_path, filename)) + ] + self.num_samples = len(self.data_files) + self.main_transform = main_transform + self.img_transform = img_transform + self.gt_transform = gt_transform + + def __getitem__(self, index): + fname = self.data_files[index] + img, den = self.read_image_and_gt(fname) + if self.main_transform is not None: + img, den = self.main_transform(img, den) + if self.img_transform is not None: + img = self.img_transform(img) + if self.gt_transform is not None: + den = self.gt_transform(den) + return img, den + + def __len__(self): + return self.num_samples + + def read_image_and_gt(self, fname): + img = Image.open(os.path.join(self.img_path, fname)) + if img.mode == "L": + img = img.convert("RGB") + + # den = sio.loadmat(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.mat')) + # den = den['map'] + den = pd.read_csv( + os.path.join(self.gt_path, os.path.splitext(fname)[0] + ".csv"), + sep=",", + header=None, + ).values + + den = den.astype(np.float32, copy=False) + den = Image.fromarray(den) + return img, den + + def get_num_samples(self): + return self.num_samples diff --git a/datasets/FUDAN/__init__.py b/datasets/FUDAN/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datasets/FUDAN/loading_data.py b/datasets/FUDAN/loading_data.py new file mode 100644 index 0000000..636caeb --- /dev/null +++ b/datasets/FUDAN/loading_data.py @@ -0,0 +1,62 @@ +import torchvision.transforms as standard_transforms +from torch.utils.data import DataLoader +import misc.transforms as own_transforms +from datasets.FUDAN.FUDAN import Fudan +from datasets.FUDAN.setting import cfg_data +import torch + + +def loading_data(): + mean_std = cfg_data.MEAN_STD + log_para = cfg_data.LOG_PARA + train_main_transform = own_transforms.Compose( + [ + # own_transforms.RandomCrop(cfg_data.TRAIN_SIZE), + own_transforms.RandomHorizontallyFlip() + ] + ) + val_main_transform = own_transforms.Compose( + [own_transforms.RandomCrop(cfg_data.TRAIN_SIZE)] + ) + val_main_transform = None + img_transform = standard_transforms.Compose( + [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] + ) + gt_transform = standard_transforms.Compose( + [own_transforms.LabelNormalize(log_para)] + ) + restore_transform = standard_transforms.Compose( + [own_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage()] + ) + + train_set = Fudan( + cfg_data.DATA_PATH + "/train", + "train", + main_transform=train_main_transform, + img_transform=img_transform, + gt_transform=gt_transform, + ) + train_loader = DataLoader( + train_set, + batch_size=cfg_data.TRAIN_BATCH_SIZE, + num_workers=8, + shuffle=True, + drop_last=True, + ) + + val_set = Fudan( + cfg_data.DATA_PATH + "/test", + "test", + main_transform=val_main_transform, + img_transform=img_transform, + gt_transform=gt_transform, + ) + val_loader = DataLoader( + val_set, + batch_size=cfg_data.VAL_BATCH_SIZE, + num_workers=8, + shuffle=True, + drop_last=False, + ) + + return train_loader, val_loader, restore_transform diff --git a/datasets/FUDAN/make_Fudan.ipynb b/datasets/FUDAN/make_Fudan.ipynb new file mode 100644 index 0000000..c9a1b10 --- /dev/null +++ b/datasets/FUDAN/make_Fudan.ipynb @@ -0,0 +1,462 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import os.path\n", + "import cv2\n", + "import glob\n", + "import h5py\n", + "import scipy\n", + "import pickle\n", + "import numpy as np\n", + "from PIL import Image\n", + "import scipy.io as io\n", + "from itertools import islice\n", + "from tqdm import tqdm\n", + "from matplotlib import pyplot as plt\n", + "from sortedcontainers import SortedDict\n", + "from scipy.ndimage import gaussian_filter \n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "def get_img_pathes(path_sets):\n", + " \"\"\"\n", + " Return all images from all pathes in 'path_sets'\n", + " \"\"\"\n", + " img_pathes = []\n", + " for path in path_sets:\n", + " for img_path in glob.glob(os.path.join(path, '*.jpg')):\n", + " img_pathes.append(img_path)\n", + " return img_pathes\n", + "\n", + "\n", + "def save_computed_density(density_map, out_path):\n", + " \"\"\"\n", + " Save density map to h5py format\n", + " \"\"\"\n", + " with h5py.File(out_path, 'w') as hf:\n", + " hf['density'] = density_map\n", + " \n", + "\n", + "def compute_sigma(gt_count, distance=None, min_sigma=1, method=1, fixed_sigma=15):\n", + " \"\"\"\n", + " Compute sigma for gaussian kernel with different methods :\n", + " * method = 1 : sigma = (sum of distance to 3 nearest neighbors) / 10\n", + " * method = 2 : sigma = distance to nearest neighbor\n", + " * method = 3 : sigma = fixed value\n", + " ** if sigma lower than threshold 'min_sigma', then 'min_sigma' will be used\n", + " ** in case of one point on the image sigma = 'fixed_sigma'\n", + " \"\"\" \n", + " if gt_count > 1 and distance is not None:\n", + " if method == 1:\n", + " sigma = np.mean(distance[1:4])*0.1\n", + " elif method == 2:\n", + " sigma = distance[1]\n", + " elif method == 3:\n", + " sigma = fixed_sigma\n", + " else:\n", + " sigma = fixed_sigma\n", + " if sigma < min_sigma:\n", + " sigma = min_sigma\n", + " return sigma\n", + "\n", + "\n", + "def find_closest_key(sorted_dict, key):\n", + " \"\"\"\n", + " Find closest key in sorted_dict to 'key'\n", + " \"\"\"\n", + " keys = list(islice(sorted_dict.irange(minimum=key), 1))\n", + " keys.extend(islice(sorted_dict.irange(maximum=key, reverse=True), 1))\n", + " return min(keys, key=lambda k: abs(key - k))\n", + "\n", + "\n", + "def gaussian_filter_density(non_zero_points, map_h, map_w, distances=None, kernels_dict=None, min_sigma=2, method=1, const_sigma=15):\n", + " \"\"\"\n", + " Fast gaussian filter implementation : using precomputed distances and kernels\n", + " \"\"\"\n", + " gt_count = non_zero_points.shape[0]\n", + " density_map = np.zeros((map_h, map_w), dtype=np.float32)\n", + "\n", + " for i in range(gt_count):\n", + " point_y, point_x = non_zero_points[i]\n", + " sigma = compute_sigma(gt_count, distances[i], min_sigma=min_sigma, method=method, fixed_sigma=const_sigma)\n", + " closest_sigma = find_closest_key(kernels_dict, sigma)\n", + " kernel = kernels_dict[closest_sigma]\n", + " full_kernel_size = kernel.shape[0]\n", + " kernel_size = full_kernel_size // 2\n", + "\n", + " min_img_x = max(0, point_x-kernel_size)\n", + " min_img_y = max(0, point_y-kernel_size)\n", + " max_img_x = min(point_x+kernel_size+1, map_h - 1)\n", + " max_img_y = min(point_y+kernel_size+1, map_w - 1)\n", + "\n", + " kernel_x_min = kernel_size - point_x if point_x <= kernel_size else 0\n", + " kernel_y_min = kernel_size - point_y if point_y <= kernel_size else 0\n", + " kernel_x_max = kernel_x_min + max_img_x - min_img_x\n", + " kernel_y_max = kernel_y_min + max_img_y - min_img_y\n", + "\n", + " density_map[min_img_x:max_img_x, min_img_y:max_img_y] += kernel[kernel_x_min:kernel_x_max, kernel_y_min:kernel_y_max]\n", + " return density_map\n", + "\n", + "\n", + "def get_gt_dots(mat_path, img_height, img_width):\n", + " \"\"\"\n", + " Load Matlab file with ground truth labels and save it to numpy array.\n", + " ** cliping is needed to prevent going out of the array\n", + " \"\"\"\n", + " mat = io.loadmat(mat_path)\n", + " gt = mat[\"image_info\"][0,0][0,0][0].astype(np.float32).round().astype(int)\n", + " gt[:,0] = gt[:,0].clip(0, img_width - 1)\n", + " gt[:,1] = gt[:,1].clip(0, img_height - 1)\n", + " return gt\n", + "\n", + "\n", + "def set_circles_on_img(image, bbox_list, circle_size=2):\n", + " \"\"\"\n", + " Set circles on images at centers of bboxes in bbox_list\n", + " \"\"\"\n", + " for bbox in bbox_list:\n", + " cv2.circle(image, (bbox[0], bbox[1]), circle_size, (255, 0, 0), -1)\n", + " return image\n", + "\n", + "\n", + "def generate_gaussian_kernels(out_kernels_path='gaussian_kernels.pkl', round_decimals = 3, sigma_threshold = 4, sigma_min=0, sigma_max=20, num_sigmas=801):\n", + " \"\"\"\n", + " Computing gaussian filter kernel for sigmas in linspace(sigma_min, sigma_max, num_sigmas) and saving \n", + " them to dict. \n", + " \"\"\"\n", + " kernels_dict = dict()\n", + " sigma_space = np.linspace(sigma_min, sigma_max, num_sigmas)\n", + " for sigma in tqdm(sigma_space):\n", + " sigma = np.round(sigma, decimals=round_decimals) \n", + " kernel_size = np.ceil(sigma*sigma_threshold).astype(np.intc)\n", + "\n", + " img_shape = (kernel_size*2+1, kernel_size*2+1)\n", + " img_center = (img_shape[0]//2, img_shape[1]//2)\n", + "\n", + " arr = np.zeros(img_shape)\n", + " arr[img_center] = 1\n", + "\n", + " arr = scipy.ndimage.filters.gaussian_filter(arr, sigma, mode='constant') \n", + " kernel = arr / arr.sum()\n", + " kernels_dict[sigma] = kernel\n", + " \n", + " print(f'Computed {len(sigma_space)} gaussian kernels. Saving them to {out_kernels_path}')\n", + "\n", + " with open(out_kernels_path, 'wb') as f:\n", + " pickle.dump(kernels_dict, f)\n", + " \n", + " \n", + "def compute_distances(out_dist_path='distances_dict.pkl', root_path='./', n_neighbors = 4, leafsize=1024):\n", + " distances_dict = dict()\n", + " full_img_pathes = glob.glob(f'{root_path}/images/*.jpg')\n", + "\n", + " for full_img_path in tqdm(full_img_pathes):\n", + " mat_path = full_img_path.replace('.jpg','.mat').replace('images','ground-truth').replace('img','gt_img')\n", + "\n", + " img = plt.imread(full_img_path)\n", + " non_zero_points = get_gt_dots(mat_path, *img.shape[0:2])\n", + "\n", + " tree = scipy.spatial.KDTree(non_zero_points.copy(), leafsize=leafsize) # build kdtree\n", + " distances, _ = tree.query(non_zero_points, k=n_neighbors) # query kdtree\n", + "\n", + " distances_dict[full_img_path] = distances\n", + " \n", + " print(f'Distances computed for {len(full_img_pathes)}. Saving them to {out_dist_path}')\n", + "\n", + " with open(out_dist_path, 'wb') as f:\n", + " pickle.dump(distances_dict, f)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/wc/w_kk2yzn49s52czst7nrbxyw0000gn/T/ipykernel_11301/2387218124.py:109: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", + "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", + " for sigma in tqdm_notebook(sigma_space):\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "72a0558118d14ea3a3e615bbe86944b6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/801 [00:00 39\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mopen\u001b[39;49m(file_like, mode), \u001b[39mTrue\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mOSError\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 41\u001b[0m \u001b[39m# Probably \"not found\"\u001b[39;00m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './FUDAN-UCC/part1/ground-truth/63.mat'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[26], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[39m# uncomment to generate and save dict with distances \u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39misfile(precomputed_distances_path):\n\u001b[0;32m----> 5\u001b[0m compute_distances(out_dist_path\u001b[39m=\u001b[39;49mprecomputed_distances_path, root_path\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39m./FUDAN-UCC/part1\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m 7\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(precomputed_distances_path, \u001b[39m'\u001b[39m\u001b[39mrb\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 8\u001b[0m distances_dict \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mload(f)\n", + "Cell \u001b[0;32mIn[25], line 137\u001b[0m, in \u001b[0;36mcompute_distances\u001b[0;34m(out_dist_path, root_path, n_neighbors, leafsize)\u001b[0m\n\u001b[1;32m 134\u001b[0m mat_path \u001b[39m=\u001b[39m full_img_path\u001b[39m.\u001b[39mreplace(\u001b[39m'\u001b[39m\u001b[39m.jpg\u001b[39m\u001b[39m'\u001b[39m,\u001b[39m'\u001b[39m\u001b[39m.mat\u001b[39m\u001b[39m'\u001b[39m)\u001b[39m.\u001b[39mreplace(\u001b[39m'\u001b[39m\u001b[39mimages\u001b[39m\u001b[39m'\u001b[39m,\u001b[39m'\u001b[39m\u001b[39mground-truth\u001b[39m\u001b[39m'\u001b[39m)\u001b[39m.\u001b[39mreplace(\u001b[39m'\u001b[39m\u001b[39mimg\u001b[39m\u001b[39m'\u001b[39m,\u001b[39m'\u001b[39m\u001b[39mgt_img\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 136\u001b[0m img \u001b[39m=\u001b[39m plt\u001b[39m.\u001b[39mimread(full_img_path)\n\u001b[0;32m--> 137\u001b[0m non_zero_points \u001b[39m=\u001b[39m get_gt_dots(mat_path, \u001b[39m*\u001b[39;49mimg\u001b[39m.\u001b[39;49mshape[\u001b[39m0\u001b[39;49m:\u001b[39m2\u001b[39;49m])\n\u001b[1;32m 139\u001b[0m tree \u001b[39m=\u001b[39m scipy\u001b[39m.\u001b[39mspatial\u001b[39m.\u001b[39mKDTree(non_zero_points\u001b[39m.\u001b[39mcopy(), leafsize\u001b[39m=\u001b[39mleafsize) \u001b[39m# build kdtree\u001b[39;00m\n\u001b[1;32m 140\u001b[0m distances, _ \u001b[39m=\u001b[39m tree\u001b[39m.\u001b[39mquery(non_zero_points, k\u001b[39m=\u001b[39mn_neighbors) \u001b[39m# query kdtree\u001b[39;00m\n", + "Cell \u001b[0;32mIn[25], line 86\u001b[0m, in \u001b[0;36mget_gt_dots\u001b[0;34m(mat_path, img_height, img_width)\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_gt_dots\u001b[39m(mat_path, img_height, img_width):\n\u001b[1;32m 82\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[39m Load Matlab file with ground truth labels and save it to numpy array.\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[39m ** cliping is needed to prevent going out of the array\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 86\u001b[0m mat \u001b[39m=\u001b[39m io\u001b[39m.\u001b[39;49mloadmat(mat_path)\n\u001b[1;32m 87\u001b[0m gt \u001b[39m=\u001b[39m mat[\u001b[39m\"\u001b[39m\u001b[39mimage_info\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m0\u001b[39m,\u001b[39m0\u001b[39m][\u001b[39m0\u001b[39m,\u001b[39m0\u001b[39m][\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39mastype(np\u001b[39m.\u001b[39mfloat32)\u001b[39m.\u001b[39mround()\u001b[39m.\u001b[39mastype(\u001b[39mint\u001b[39m)\n\u001b[1;32m 88\u001b[0m gt[:,\u001b[39m0\u001b[39m] \u001b[39m=\u001b[39m gt[:,\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39mclip(\u001b[39m0\u001b[39m, img_width \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m)\n", + "File \u001b[0;32m~/Desktop/MAP583/dldiy/lib/python3.9/site-packages/scipy/io/matlab/_mio.py:225\u001b[0m, in \u001b[0;36mloadmat\u001b[0;34m(file_name, mdict, appendmat, **kwargs)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 89\u001b[0m \u001b[39mLoad MATLAB file.\u001b[39;00m\n\u001b[1;32m 90\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[39m 3.14159265+3.14159265j])\u001b[39;00m\n\u001b[1;32m 223\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 224\u001b[0m variable_names \u001b[39m=\u001b[39m kwargs\u001b[39m.\u001b[39mpop(\u001b[39m'\u001b[39m\u001b[39mvariable_names\u001b[39m\u001b[39m'\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[0;32m--> 225\u001b[0m \u001b[39mwith\u001b[39;00m _open_file_context(file_name, appendmat) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 226\u001b[0m MR, _ \u001b[39m=\u001b[39m mat_reader_factory(f, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 227\u001b[0m matfile_dict \u001b[39m=\u001b[39m MR\u001b[39m.\u001b[39mget_variables(variable_names)\n", + "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/contextlib.py:117\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__enter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[39mdel\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mkwds, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfunc\n\u001b[1;32m 116\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 117\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mnext\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen)\n\u001b[1;32m 118\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m:\n\u001b[1;32m 119\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mgenerator didn\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt yield\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39m\n", + "File \u001b[0;32m~/Desktop/MAP583/dldiy/lib/python3.9/site-packages/scipy/io/matlab/_mio.py:17\u001b[0m, in \u001b[0;36m_open_file_context\u001b[0;34m(file_like, appendmat, mode)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[39m@contextmanager\u001b[39m\n\u001b[1;32m 16\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_open_file_context\u001b[39m(file_like, appendmat, mode\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mrb\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[0;32m---> 17\u001b[0m f, opened \u001b[39m=\u001b[39m _open_file(file_like, appendmat, mode)\n\u001b[1;32m 18\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 19\u001b[0m \u001b[39myield\u001b[39;00m f\n", + "File \u001b[0;32m~/Desktop/MAP583/dldiy/lib/python3.9/site-packages/scipy/io/matlab/_mio.py:45\u001b[0m, in \u001b[0;36m_open_file\u001b[0;34m(file_like, appendmat, mode)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[39mif\u001b[39;00m appendmat \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m file_like\u001b[39m.\u001b[39mendswith(\u001b[39m'\u001b[39m\u001b[39m.mat\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 44\u001b[0m file_like \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m.mat\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m---> 45\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mopen\u001b[39;49m(file_like, mode), \u001b[39mTrue\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 47\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mOSError\u001b[39;00m(\n\u001b[1;32m 48\u001b[0m \u001b[39m'\u001b[39m\u001b[39mReader needs file name or open file-like object\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 49\u001b[0m ) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './FUDAN-UCC/part1/ground-truth/63.mat'" + ] + } + ], + "source": [ + "precomputed_distances_path = 'distances_dict.pkl'\n", + "\n", + "# uncomment to generate and save dict with distances \n", + "if not os.path.isfile(precomputed_distances_path):\n", + " compute_distances(out_dist_path=precomputed_distances_path, root_path='./FUDAN-UCC/part1')\n", + " \n", + "with open(precomputed_distances_path, 'rb') as f:\n", + " distances_dict = pickle.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6b9af7f918e140e0a8b931c65e841c32", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=482), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "# generate GT for part 1\n", + "data_root = './FUDAN-UCC/part1/'\n", + "img_pathes = glob.glob(f'{data_root}/images/*.jpg')\n", + "map_out_folder = 'maps_adaptive_kernel/'\n", + "min_sigma = 2 ## can be set 0\n", + "method = 1\n", + "\n", + "for full_img_path in tqdm(img_pathes):\n", + " data_folder, img_path = full_img_path.split('images')\n", + " mat_path = full_img_path.replace('.jpg','.mat').replace('images','ground-truth').replace('IMG_','GT_IMG_')\n", + " \n", + " # load img and map\n", + " img = Image.open(full_img_path)\n", + " width, height = img.size\n", + " gt_points = get_gt_dots(mat_path, height, width)\n", + " \n", + " distances = distances_dict[full_img_path]\n", + " density_map = gaussian_filter_density(gt_points, height, width, distances, kernels_dict, min_sigma=min_sigma, method=method)\n", + " \n", + " curr_map_out_folder = data_folder + map_out_folder\n", + " gt_out_path = curr_map_out_folder + img_path.strip('/').replace('.jpg', '.h5')\n", + " \n", + "# #plt.imshow(img)\n", + "# plt.imshow(density_map, alpha=1)\n", + "# plt.show()\n", + "# break\n", + " if not os.path.isdir(curr_map_out_folder):\n", + " print('creating ' + curr_map_out_folder)\n", + " os.makedirs(curr_map_out_folder)\n", + " save_computed_density(density_map, gt_out_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "61b63f97f03146bba6907cddfda42e04", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=716), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "# generate GT for part 2\n", + "data_root = './FUDAN-UCC/part2/'\n", + "img_pathes = glob.glob(f'{data_root}/images/*.jpg')\n", + "map_out_folder = 'maps_fixed_kernel/'\n", + "min_sigma = 2\n", + "method = 3\n", + "const_sigma=15\n", + "\n", + "for full_img_path in tqdm(img_pathes):\n", + " data_folder, img_path = full_img_path.split('images')\n", + " mat_path = full_img_path.replace('.jpg','.mat').replace('images','ground-truth').replace('IMG_','GT_IMG_')\n", + " \n", + " # load img and map\n", + " img = Image.open(full_img_path)\n", + " width, height = img.size\n", + " gt_points = get_gt_dots(mat_path, height, width)\n", + " \n", + " distances = distances_dict[full_img_path]\n", + " density_map = gaussian_filter_density(gt_points, height, width, distances, kernels_dict, min_sigma=min_sigma, method=method,const_sigma=const_sigma)\n", + " \n", + " curr_map_out_folder = data_folder + map_out_folder\n", + " gt_out_path = curr_map_out_folder + img_path.strip('/').replace('.jpg', '.h5')\n", + " \n", + "# #plt.imshow(img)\n", + "# plt.imshow(density_map, alpha=1)\n", + "# plt.show()\n", + "# break\n", + " if not os.path.isdir(curr_map_out_folder):\n", + " print('creating ' + curr_map_out_folder)\n", + " os.makedirs(curr_map_out_folder)\n", + " save_computed_density(density_map, gt_out_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.imshow(img)\n", + "plt.imshow(density_map, alpha=0.75)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dldiy", + "language": "python", + "name": "dldiy" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + }, + "vscode": { + "interpreter": { + "hash": "b5b25aaea419aac5c36814101d617d5039a3d8bc965da8b9de2287350d0b87c2" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/datasets/FUDAN/preapre_FUDAN.m b/datasets/FUDAN/preapre_FUDAN.m new file mode 100644 index 0000000..7da4cb8 --- /dev/null +++ b/datasets/FUDAN/preapre_FUDAN.m @@ -0,0 +1,54 @@ +clc; clear all; +dataset = 'B'; +standard_size = [768,1024]; + +att = 'test'; + +dataset_name = ['shanghaitech_part_' dataset]; +path = ['../data/ShanghaiTech_Crowd_Detecting/part_' dataset '_final/' att '_data/images/']; +output_path = '../data/768x1024RGB-k15-s4/'; +train_path_img = strcat(output_path, dataset_name,'/', att, '/img/'); +train_path_den = strcat(output_path, dataset_name,'/', att, '/den/'); + +gt_path = ['../data/ShanghaiTech_Crowd_Detecting/part_' dataset '_final/' att '_data/ground_truth/']; + +mkdir(output_path) +mkdir(train_path_img); +mkdir(train_path_den); + +if (dataset == 'A') + num_images = 300; +else + num_images = 400; +end + +for idx = 1:num_images + i = idx; + if (mod(idx,10)==0) + fprintf(1,'Processing %3d/%d files\n', idx, num_images); + end + load(strcat(gt_path, 'GT_IMG_',num2str(i),'.mat')) ; + input_img_name = strcat(path,'IMG_',num2str(i),'.jpg'); + im = imread(input_img_name); + [h, w, c] = size(im); + annPoints = image_info{1}.location; + + + rate = standard_size(1)/h; + rate_w = w*rate; + if rate_w>standard_size(2) + rate = standard_size(2)/w; + end + rate_h = double(int16(h*rate))/h; + rate_w = double(int16(w*rate))/w; + im = imresize(im,[int16(h*rate),int16(w*rate)]); + annPoints(:,1) = annPoints(:,1)*double(rate_w); + annPoints(:,2) = annPoints(:,2)*double(rate_h); + + im_density = get_density_map_gaussian(im,annPoints,15,4); + im_density = im_density(:,:,1); + + imwrite(im, [train_path_img num2str(idx) '.jpg']); + csvwrite([train_path_den num2str(idx) '.csv'], im_density); +end + diff --git a/datasets/FUDAN/setting.py b/datasets/FUDAN/setting.py new file mode 100644 index 0000000..05a5c8f --- /dev/null +++ b/datasets/FUDAN/setting.py @@ -0,0 +1,22 @@ +from easydict import EasyDict as edict + +# init +__C_FUDAN = edict() + +cfg_data = __C_FUDAN + +__C_FUDAN.STD_SIZE = (768,1024) +__C_FUDAN.TRAIN_SIZE = (576,768) +__C_FUDAN.DATA_PATH = '../ProcessedData/Fudan-UCC' + +__C_FUDAN.MEAN_STD = ([0.452016860247, 0.447249650955, 0.431981861591],[0.23242045939, 0.224925786257, 0.221840232611]) + +__C_FUDAN.LABEL_FACTOR = 1 +__C_FUDAN.LOG_PARA = 100. + +__C_FUDAN.RESUME_MODEL = ''#model path +__C_FUDAN.TRAIN_BATCH_SIZE = 6 #imgs + +__C_FUDAN.VAL_BATCH_SIZE = 6 # + + diff --git a/datasets/GCC/GCC.py b/datasets/GCC/GCC.py index 0fe42e4..eef4a2c 100644 --- a/datasets/GCC/GCC.py +++ b/datasets/GCC/GCC.py @@ -8,10 +8,18 @@ import pandas as pd -from setting import cfg_data +from datasets.GCC.setting import cfg_data + class GCC(data.Dataset): - def __init__(self, list_file, mode, main_transform=None, img_transform=None, gt_transform=None): + def __init__( + self, + list_file, + mode, + main_transform=None, + img_transform=None, + gt_transform=None, + ): self.crowd_level = [] self.time = [] @@ -19,10 +27,10 @@ def __init__(self, list_file, mode, main_transform=None, img_transform=None, gt_ self.file_folder = [] self.file_name = [] self.gt_cnt = [] - + with open(list_file) as f: lines = f.readlines() - + for line in lines: splited = line.strip().split() @@ -34,56 +42,70 @@ def __init__(self, list_file, mode, main_transform=None, img_transform=None, gt_ self.gt_cnt.append(int(splited[5])) self.mode = mode - self.main_transform = main_transform + self.main_transform = main_transform self.img_transform = img_transform self.gt_transform = gt_transform - self.num_samples = len(lines) - - + self.num_samples = len(lines) + def __getitem__(self, index): img, den = self.read_image_and_gt(index) - + if self.main_transform is not None: - img, den = self.main_transform(img,den) + img, den = self.main_transform(img, den) if self.img_transform is not None: img = self.img_transform(img) - - # den = torch.from_numpy(np.array(den, dtype=np.float32)) + # den = torch.from_numpy(np.array(den, dtype=np.float32)) if self.gt_transform is not None: den = self.gt_transform(den) - - if self.mode == 'train': - return img, den, - elif self.mode == 'test': - attributes_pt = torch.from_numpy(np.array([int(self.crowd_level[index]),int(self.time[index]),int(self.weather[index])])) + + if self.mode == "train": + return ( + img, + den, + ) + elif self.mode == "test": + attributes_pt = torch.from_numpy( + np.array( + [ + int(self.crowd_level[index]), + int(self.time[index]), + int(self.weather[index]), + ] + ) + ) return img, den, attributes_pt else: - print('invalid data mode!!!') + print("invalid data mode!!!") def __len__(self): return self.num_samples - def read_image_and_gt(self,index): + def read_image_and_gt(self, index): - img_path = os.path.join(cfg_data.DATA_PATH+self.file_folder[index], 'pngs_544_960', self.file_name[index]+'.png') + img_path = os.path.join( + cfg_data.DATA_PATH + self.file_folder[index], + "pngs_544_960", + self.file_name[index] + ".png", + ) - den_map_path = os.path.join(cfg_data.DATA_PATH+self.file_folder[index], 'csv_den_maps_' + cfg_data.DATA_GT +'_544_960', self.file_name[index]+'.csv') + den_map_path = os.path.join( + cfg_data.DATA_PATH + self.file_folder[index], + "csv_den_maps_" + cfg_data.DATA_GT + "_544_960", + self.file_name[index] + ".csv", + ) img = Image.open(img_path) - den_map = pd.read_csv(den_map_path, sep=',',header=None).values + den_map = pd.read_csv(den_map_path, sep=",", header=None).values - # den_map = sio.loadmat(den_map_path)['den_map'] + # den_map = sio.loadmat(den_map_path)['den_map'] den_map = den_map.astype(np.float32, copy=False) den_map = Image.fromarray(den_map) - - return img, den_map + return img, den_map def get_num_samples(self): - return self.num_samples - - \ No newline at end of file + return self.num_samples diff --git a/datasets/GCC/loading_data.py b/datasets/GCC/loading_data.py index 2ccf054..1a8ec5b 100644 --- a/datasets/GCC/loading_data.py +++ b/datasets/GCC/loading_data.py @@ -1,47 +1,69 @@ import torchvision.transforms as standard_transforms from torch.utils.data import DataLoader import misc.transforms as own_transforms -from .GCC import GCC -from .setting import cfg_data +from datasets.GCC.GCC import GCC +from datasets.GCC.setting import cfg_data as default_cfg_data import torch import random - -def loading_data(): +def loading_data(cfg_data): mean_std = cfg_data.MEAN_STD log_para = cfg_data.LOG_PARA - train_main_transform = own_transforms.Compose([ - # own_transforms.RandomCrop(cfg_data.TRAIN_SIZE), - own_transforms.RandomHorizontallyFlip() - ]) - img_transform = standard_transforms.Compose([ - standard_transforms.ToTensor(), - standard_transforms.Normalize(*mean_std) - ]) - gt_transform = standard_transforms.Compose([ - own_transforms.LabelNormalize(log_para) - ]) - restore_transform = standard_transforms.Compose([ - own_transforms.DeNormalize(*mean_std), - standard_transforms.ToPILImage() - ]) - - if cfg_data.VAL_MODE=='rd': - test_list = 'test_list.txt' - train_list = 'train_list.txt' - elif cfg_data.VAL_MODE=='cc': - test_list = 'cross_camera_test_list.txt' - train_list = 'cross_camera_train_list.txt' - elif cfg_data.VAL_MODE=='cl': - test_list = 'cross_location_test_list.txt' - train_list = 'cross_location_train_list.txt' + train_main_transform = own_transforms.Compose( + [ + # own_transforms.RandomCrop(cfg_data.TRAIN_SIZE), + own_transforms.RandomHorizontallyFlip() + ] + ) + img_transform = standard_transforms.Compose( + [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] + ) + gt_transform = standard_transforms.Compose( + [own_transforms.LabelNormalize(log_para)] + ) + restore_transform = standard_transforms.Compose( + [own_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage()] + ) + if cfg_data.VAL_MODE == "rd": + test_list = "test_list.txt" + train_list = "train_list.txt" + elif cfg_data.VAL_MODE == "cc": + test_list = "cross_camera_test_list.txt" + train_list = "cross_camera_train_list.txt" + elif cfg_data.VAL_MODE == "cl": + test_list = "cross_location_test_list.txt" + train_list = "cross_location_train_list.txt" - train_set = GCC(cfg_data.DATA_PATH+'/txt_list/' + train_list, 'train',main_transform=train_main_transform, img_transform=img_transform, gt_transform=gt_transform) - train_loader = DataLoader(train_set, batch_size=cfg_data.TRAIN_BATCH_SIZE, num_workers=8, shuffle=True, drop_last=True) + train_set = GCC( + cfg_data.DATA_PATH + "/txt_list/" + train_list, + "train", + main_transform=train_main_transform, + img_transform=img_transform, + gt_transform=gt_transform, + ) + train_loader = DataLoader( + train_set, + batch_size=cfg_data.TRAIN_BATCH_SIZE, + num_workers=8, + shuffle=True, + drop_last=True, + ) - val_set = GCC(cfg_data.DATA_PATH+'/txt_list/'+ test_list, 'test', main_transform=None, img_transform=img_transform, gt_transform=gt_transform) - val_loader = DataLoader(val_set, batch_size=cfg_data.VAL_BATCH_SIZE, num_workers=8, shuffle=True, drop_last=False) + val_set = GCC( + cfg_data.DATA_PATH + "/txt_list/" + test_list, + "test", + main_transform=None, + img_transform=img_transform, + gt_transform=gt_transform, + ) + val_loader = DataLoader( + val_set, + batch_size=cfg_data.VAL_BATCH_SIZE, + num_workers=8, + shuffle=True, + drop_last=False, + ) return train_loader, val_loader, restore_transform diff --git a/datasets/Mall/Mall.py b/datasets/Mall/Mall.py index d55a0e8..7334f42 100644 --- a/datasets/Mall/Mall.py +++ b/datasets/Mall/Mall.py @@ -10,53 +10,66 @@ from config import cfg + class Mall(data.Dataset): - def __init__(self, data_path, mode, main_transform=None, img_transform=None, gt_transform=None): - self.img_path = data_path + '/img' - self.gt_path = data_path + '/den' - self.data_files = [filename for filename in os.listdir(self.img_path) \ - if os.path.isfile(os.path.join(self.img_path,filename))] - self.num_samples = len(self.data_files) - self.main_transform=main_transform + def __init__( + self, + data_path, + mode, + main_transform=None, + img_transform=None, + gt_transform=None, + ): + self.img_path = data_path + "/img" + self.gt_path = data_path + "/den" + self.data_files = [ + filename + for filename in os.listdir(self.img_path) + if os.path.isfile(os.path.join(self.img_path, filename)) + ] + self.num_samples = len(self.data_files) + self.main_transform = main_transform self.img_transform = img_transform self.gt_transform = gt_transform self.mode = mode - if self.mode is 'train': - print('[Mall DATASET]: %d training images.' % (self.num_samples)) - if self.mode is 'test': - print('[Mall DATASET]: %d testing images.' % (self.num_samples)) - + if self.mode is "train": + print("[Mall DATASET]: %d training images." % (self.num_samples)) + if self.mode is "test": + print("[Mall DATASET]: %d testing images." % (self.num_samples)) + def __getitem__(self, index): # print self.data_files[index] fname = self.data_files[index] - img, den = self.read_image_and_gt(fname) + img, den = self.read_image_and_gt(fname) if self.main_transform is not None: - img, den = self.main_transform(img,den) + img, den = self.main_transform(img, den) if self.img_transform is not None: - img = self.img_transform(img) + img = self.img_transform(img) if self.gt_transform is not None: - den = self.gt_transform(den) + den = self.gt_transform(den) return img, den def __len__(self): return self.num_samples - def read_image_and_gt(self,fname): - img = Image.open(os.path.join(self.img_path,fname)) - if img.mode == 'L': - img = img.convert('RGB') + def read_image_and_gt(self, fname): + img = Image.open(os.path.join(self.img_path, fname)) + if img.mode == "L": + img = img.convert("RGB") # den = sio.loadmat(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.mat')) # den = den['map'] - den = pd.read_csv(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.csv'), sep=',',header=None).values - - den = den.astype(np.float32, copy=False) - den = Image.fromarray(den) - return img, den + den = pd.read_csv( + os.path.join(self.gt_path, os.path.splitext(fname)[0] + ".csv"), + sep=",", + header=None, + ).values + + den = den.astype(np.float32, copy=False) + den = Image.fromarray(den) + return img, den def get_num_samples(self): - return self.num_samples - - + return self.num_samples diff --git a/datasets/Mall/loading_data.py b/datasets/Mall/loading_data.py index 94a070d..16f4823 100644 --- a/datasets/Mall/loading_data.py +++ b/datasets/Mall/loading_data.py @@ -1,39 +1,63 @@ import torchvision.transforms as standard_transforms from torch.utils.data import DataLoader + # from misc.data import DataLoader import misc.transforms as own_transforms -from .Mall import Mall -from .setting import cfg_data +from datasets.Mall.Mall import Mall +from datasets.Mall.setting import cfg_data as default_cfg_data import torch -def loading_data(train_mode): +def loading_data(train_mode, cfg_data): mean_std = cfg_data.MEAN_STD log_para = cfg_data.LOG_PARA - train_main_transform = own_transforms.Compose([ - # own_transforms.RandomCrop(cfg_data.TRAIN_SIZE), - own_transforms.RandomHorizontallyFlip() - ]) - img_transform = standard_transforms.Compose([ - standard_transforms.ToTensor(), - standard_transforms.Normalize(*mean_std) - ]) - gt_transform = standard_transforms.Compose([ - own_transforms.LabelNormalize(log_para) - ]) - restore_transform = standard_transforms.Compose([ - own_transforms.DeNormalize(*mean_std), - standard_transforms.ToPILImage() - ]) + train_main_transform = own_transforms.Compose( + [ + # own_transforms.RandomCrop(cfg_data.TRAIN_SIZE), + own_transforms.RandomHorizontallyFlip() + ] + ) + img_transform = standard_transforms.Compose( + [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] + ) + gt_transform = standard_transforms.Compose( + [own_transforms.LabelNormalize(log_para)] + ) + restore_transform = standard_transforms.Compose( + [own_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage()] + ) train_loader = None - if train_mode == 'DA': - train_set = Mall(cfg_data.DATA_PATH+'/train', 'train',main_transform=train_main_transform, img_transform=img_transform, gt_transform=gt_transform) - train_loader = DataLoader(train_set, batch_size=cfg_data.TRAIN_BATCH_SIZE, num_workers=0, shuffle=True, drop_last=True) - + if train_mode == "DA": + train_set = Mall( + cfg_data.DATA_PATH + "/train", + "train", + main_transform=train_main_transform, + img_transform=img_transform, + gt_transform=gt_transform, + ) + train_loader = DataLoader( + train_set, + batch_size=cfg_data.TRAIN_BATCH_SIZE, + num_workers=0, + shuffle=True, + drop_last=True, + ) - val_set = Mall(cfg_data.DATA_PATH+'/test', 'test', main_transform=None, img_transform=img_transform, gt_transform=gt_transform) - val_loader = DataLoader(val_set, batch_size=cfg_data.VAL_BATCH_SIZE, num_workers=0, shuffle=False, drop_last=False) + val_set = Mall( + cfg_data.DATA_PATH + "/test", + "test", + main_transform=None, + img_transform=img_transform, + gt_transform=gt_transform, + ) + val_loader = DataLoader( + val_set, + batch_size=cfg_data.VAL_BATCH_SIZE, + num_workers=0, + shuffle=False, + drop_last=False, + ) return train_loader, val_loader, restore_transform diff --git a/datasets/QNRF/QNRF.py b/datasets/QNRF/QNRF.py index 2c3294e..bdcb786 100644 --- a/datasets/QNRF/QNRF.py +++ b/datasets/QNRF/QNRF.py @@ -9,45 +9,58 @@ import pandas as pd + class QNRF(data.Dataset): - def __init__(self, data_path, mode, main_transform=None, img_transform=None, gt_transform=None): - self.img_path = data_path + '/img' - self.gt_path = data_path + '/den' - self.data_files = [filename for filename in os.listdir(self.img_path) \ - if os.path.isfile(os.path.join(self.img_path,filename))] - self.num_samples = len(self.data_files) - self.main_transform=main_transform + def __init__( + self, + data_path, + mode, + main_transform=None, + img_transform=None, + gt_transform=None, + ): + self.img_path = data_path + "/img" + self.gt_path = data_path + "/den" + self.data_files = [ + filename + for filename in os.listdir(self.img_path) + if os.path.isfile(os.path.join(self.img_path, filename)) + ] + self.num_samples = len(self.data_files) + self.main_transform = main_transform self.img_transform = img_transform - self.gt_transform = gt_transform - + self.gt_transform = gt_transform + def __getitem__(self, index): fname = self.data_files[index] - img, den = self.read_image_and_gt(fname) + img, den = self.read_image_and_gt(fname) if self.main_transform is not None: - img, den = self.main_transform(img,den) + img, den = self.main_transform(img, den) if self.img_transform is not None: - img = self.img_transform(img) + img = self.img_transform(img) if self.gt_transform is not None: - den = self.gt_transform(den) + den = self.gt_transform(den) return img, den def __len__(self): return self.num_samples - def read_image_and_gt(self,fname): - img = Image.open(os.path.join(self.img_path,fname)) - if img.mode == 'L': - img = img.convert('RGB') + def read_image_and_gt(self, fname): + img = Image.open(os.path.join(self.img_path, fname)) + if img.mode == "L": + img = img.convert("RGB") # den = sio.loadmat(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.mat')) # den = den['map'] - den = pd.read_csv(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.csv'), sep=',',header=None).values - - den = den.astype(np.float32, copy=False) - den = Image.fromarray(den) - return img, den + den = pd.read_csv( + os.path.join(self.gt_path, os.path.splitext(fname)[0] + ".csv"), + sep=",", + header=None, + ).values + + den = den.astype(np.float32, copy=False) + den = Image.fromarray(den) + return img, den def get_num_samples(self): - return self.num_samples - - \ No newline at end of file + return self.num_samples diff --git a/datasets/QNRF/loading_data.py b/datasets/QNRF/loading_data.py index 8867800..70fbfe3 100644 --- a/datasets/QNRF/loading_data.py +++ b/datasets/QNRF/loading_data.py @@ -1,41 +1,52 @@ import torchvision.transforms as standard_transforms from torch.utils.data import DataLoader import misc.transforms as own_transforms -from .QNRF import QNRF -from .setting import cfg_data +from datasets.QNRF.QNRF import QNRF +from datasets.QNRF.setting import cfg_data as default_cfg_data import torch import random -def get_min_size(batch): + +def get_min_size(batch, cfg_data): min_ht = cfg_data.TRAIN_SIZE[0] min_wd = cfg_data.TRAIN_SIZE[1] for i_sample in batch: - - _,ht,wd = i_sample.shape - if ht1: - train_loader = DataLoader(train_set, batch_size=cfg_data.TRAIN_BATCH_SIZE, num_workers=8, collate_fn=SHHA_collate, shuffle=True, drop_last=True) - - - - val_set = SHHA(cfg_data.DATA_PATH+'/test', 'test', main_transform=None, img_transform=img_transform, gt_transform=gt_transform) - val_loader = DataLoader(val_set, batch_size=cfg_data.VAL_BATCH_SIZE, num_workers=8, shuffle=True, drop_last=False) + train_main_transform = own_transforms.Compose( + [own_transforms.RandomHorizontallyFlip()] + ) + img_transform = standard_transforms.Compose( + [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] + ) + gt_transform = standard_transforms.Compose( + [own_transforms.GTScaleDown(factor), own_transforms.LabelNormalize(log_para)] + ) + restore_transform = standard_transforms.Compose( + [own_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage()] + ) + + train_set = SHHA( + cfg_data.DATA_PATH + "/train", + "train", + main_transform=train_main_transform, + img_transform=img_transform, + gt_transform=gt_transform, + ) + train_loader = None + if cfg_data.TRAIN_BATCH_SIZE == 1: + train_loader = DataLoader( + train_set, batch_size=1, num_workers=8, shuffle=True, drop_last=True + ) + elif cfg_data.TRAIN_BATCH_SIZE > 1: + train_loader = DataLoader( + train_set, + batch_size=cfg_data.TRAIN_BATCH_SIZE, + num_workers=8, + collate_fn=lambda x: SHHA_collate(x, cfg_data), + shuffle=True, + drop_last=True, + ) + + val_set = SHHA( + cfg_data.DATA_PATH + "/test", + "test", + main_transform=None, + img_transform=img_transform, + gt_transform=gt_transform, + ) + val_loader = DataLoader( + val_set, + batch_size=cfg_data.VAL_BATCH_SIZE, + num_workers=8, + shuffle=True, + drop_last=False, + ) return train_loader, val_loader, restore_transform diff --git a/datasets/SHHB/SHHB.py b/datasets/SHHB/SHHB.py index 97df132..7e15d97 100644 --- a/datasets/SHHB/SHHB.py +++ b/datasets/SHHB/SHHB.py @@ -11,45 +11,58 @@ from config import cfg + class SHHB(data.Dataset): - def __init__(self, data_path, mode, main_transform=None, img_transform=None, gt_transform=None): - self.img_path = data_path + '/img' - self.gt_path = data_path + '/den' - self.data_files = [filename for filename in os.listdir(self.img_path) \ - if os.path.isfile(os.path.join(self.img_path,filename))] - self.num_samples = len(self.data_files) - self.main_transform=main_transform + def __init__( + self, + data_path, + mode, + main_transform=None, + img_transform=None, + gt_transform=None, + ): + self.img_path = data_path + "/img" + self.gt_path = data_path + "/den" + self.data_files = [ + filename + for filename in os.listdir(self.img_path) + if os.path.isfile(os.path.join(self.img_path, filename)) + ] + self.num_samples = len(self.data_files) + self.main_transform = main_transform self.img_transform = img_transform - self.gt_transform = gt_transform - + self.gt_transform = gt_transform + def __getitem__(self, index): fname = self.data_files[index] - img, den = self.read_image_and_gt(fname) + img, den = self.read_image_and_gt(fname) if self.main_transform is not None: - img, den = self.main_transform(img,den) + img, den = self.main_transform(img, den) if self.img_transform is not None: - img = self.img_transform(img) + img = self.img_transform(img) if self.gt_transform is not None: - den = self.gt_transform(den) + den = self.gt_transform(den) return img, den def __len__(self): return self.num_samples - def read_image_and_gt(self,fname): - img = Image.open(os.path.join(self.img_path,fname)) - if img.mode == 'L': - img = img.convert('RGB') + def read_image_and_gt(self, fname): + img = Image.open(os.path.join(self.img_path, fname)) + if img.mode == "L": + img = img.convert("RGB") # den = sio.loadmat(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.mat')) # den = den['map'] - den = pd.read_csv(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.csv'), sep=',',header=None).values - - den = den.astype(np.float32, copy=False) - den = Image.fromarray(den) - return img, den + den = pd.read_csv( + os.path.join(self.gt_path, os.path.splitext(fname)[0] + ".csv"), + sep=",", + header=None, + ).values + + den = den.astype(np.float32, copy=False) + den = Image.fromarray(den) + return img, den def get_num_samples(self): - return self.num_samples - - \ No newline at end of file + return self.num_samples diff --git a/datasets/SHHB/loading_data.py b/datasets/SHHB/loading_data.py index 0aa446c..ee03069 100644 --- a/datasets/SHHB/loading_data.py +++ b/datasets/SHHB/loading_data.py @@ -1,39 +1,62 @@ import torchvision.transforms as standard_transforms from torch.utils.data import DataLoader import misc.transforms as own_transforms -from .SHHB import SHHB -from .setting import cfg_data +from datasets.SHHB.SHHB import SHHB +from datasets.SHHB.setting import cfg_data as default_cfg_data import torch -def loading_data(): +def loading_data(cfg_data): mean_std = cfg_data.MEAN_STD log_para = cfg_data.LOG_PARA - train_main_transform = own_transforms.Compose([ - #own_transforms.RandomCrop(cfg_data.TRAIN_SIZE), - own_transforms.RandomHorizontallyFlip() - ]) - val_main_transform = own_transforms.Compose([ - own_transforms.RandomCrop(cfg_data.TRAIN_SIZE) - ]) + train_main_transform = own_transforms.Compose( + [ + # own_transforms.RandomCrop(cfg_data.TRAIN_SIZE), + own_transforms.RandomHorizontallyFlip() + ] + ) + val_main_transform = own_transforms.Compose( + [own_transforms.RandomCrop(cfg_data.TRAIN_SIZE)] + ) val_main_transform = None - img_transform = standard_transforms.Compose([ - standard_transforms.ToTensor(), - standard_transforms.Normalize(*mean_std) - ]) - gt_transform = standard_transforms.Compose([ - own_transforms.LabelNormalize(log_para) - ]) - restore_transform = standard_transforms.Compose([ - own_transforms.DeNormalize(*mean_std), - standard_transforms.ToPILImage() - ]) + img_transform = standard_transforms.Compose( + [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] + ) + gt_transform = standard_transforms.Compose( + [own_transforms.LabelNormalize(log_para)] + ) + restore_transform = standard_transforms.Compose( + [own_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage()] + ) - train_set = SHHB(cfg_data.DATA_PATH+'/train', 'train',main_transform=train_main_transform, img_transform=img_transform, gt_transform=gt_transform) - train_loader = DataLoader(train_set, batch_size=cfg_data.TRAIN_BATCH_SIZE, num_workers=8, shuffle=True, drop_last=True) - + train_set = SHHB( + cfg_data.DATA_PATH + "/train", + "train", + main_transform=train_main_transform, + img_transform=img_transform, + gt_transform=gt_transform, + ) + train_loader = DataLoader( + train_set, + batch_size=cfg_data.TRAIN_BATCH_SIZE, + num_workers=8, + shuffle=True, + drop_last=True, + ) - val_set = SHHB(cfg_data.DATA_PATH+'/test', 'test', main_transform=val_main_transform, img_transform=img_transform, gt_transform=gt_transform) - val_loader = DataLoader(val_set, batch_size=cfg_data.VAL_BATCH_SIZE, num_workers=8, shuffle=True, drop_last=False) + val_set = SHHB( + cfg_data.DATA_PATH + "/test", + "test", + main_transform=val_main_transform, + img_transform=img_transform, + gt_transform=gt_transform, + ) + val_loader = DataLoader( + val_set, + batch_size=cfg_data.VAL_BATCH_SIZE, + num_workers=8, + shuffle=True, + drop_last=False, + ) return train_loader, val_loader, restore_transform diff --git a/datasets/UCF50/UCF50.py b/datasets/UCF50/UCF50.py index 16ba61c..1ba3381 100644 --- a/datasets/UCF50/UCF50.py +++ b/datasets/UCF50/UCF50.py @@ -9,61 +9,68 @@ class UCF50(data.Dataset): - def __init__(self, data_path, folder, mode, main_transform=None, img_transform=None, gt_transform=None): - self.img_path = data_path + '/img' - self.gt_path = data_path + '/den' + def __init__( + self, + data_path, + folder, + mode, + main_transform=None, + img_transform=None, + gt_transform=None, + ): + self.img_path = data_path + "/img" + self.gt_path = data_path + "/den" self.mode = mode self.img_files = [] self.gt_files = [] for i_folder in folder: - folder_img = self.img_path + '/' + str(i_folder) - folder_gt = self.gt_path + '/' + str(i_folder) + folder_img = self.img_path + "/" + str(i_folder) + folder_gt = self.gt_path + "/" + str(i_folder) for filename in os.listdir(folder_img): - if os.path.isfile(os.path.join(folder_img,filename)): - self.img_files.append(folder_img + '/' + filename) - self.gt_files.append(folder_gt + '/' + filename.split('.')[0] + '.csv') + if os.path.isfile(os.path.join(folder_img, filename)): + self.img_files.append(folder_img + "/" + filename) + self.gt_files.append( + folder_gt + "/" + filename.split(".")[0] + ".csv" + ) - self.num_samples = len(self.img_files) + self.num_samples = len(self.img_files) self.mode = mode - self.main_transform=main_transform + self.main_transform = main_transform self.img_transform = img_transform self.gt_transform = gt_transform - - - + def __getitem__(self, index): img, den = self.read_image_and_gt(index) - + if self.main_transform is not None: - img, den = self.main_transform(img,den) + img, den = self.main_transform(img, den) if self.img_transform is not None: img = self.img_transform(img) if self.gt_transform is not None: - den = self.gt_transform(den) - + den = self.gt_transform(den) + return img, den def __len__(self): return self.num_samples - def read_image_and_gt(self,index): - img = Image.open(os.path.join(self.img_path,self.img_files[index])) - if img.mode == 'L': - img = img.convert('RGB') + def read_image_and_gt(self, index): + img = Image.open(os.path.join(self.img_path, self.img_files[index])) + if img.mode == "L": + img = img.convert("RGB") - den = pd.read_csv(os.path.join(self.gt_path,self.gt_files[index]), sep=',',header=None).values + den = pd.read_csv( + os.path.join(self.gt_path, self.gt_files[index]), sep=",", header=None + ).values den = den.astype(np.float32, copy=False) den = Image.fromarray(den) - - return img, den + return img, den def get_num_samples(self): - return self.num_samples - - + return self.num_samples diff --git a/datasets/UCF50/loading_data.py b/datasets/UCF50/loading_data.py index f68c99f..d3f8c3f 100644 --- a/datasets/UCF50/loading_data.py +++ b/datasets/UCF50/loading_data.py @@ -1,41 +1,52 @@ import torchvision.transforms as standard_transforms from torch.utils.data import DataLoader import misc.transforms as own_transforms -from .UCF50 import UCF50 -from .setting import cfg_data +from datasets.UCF50.UCF50 import UCF50 +from datasets.UCF50.setting import cfg_data as default_cfg_data import torch import random -def get_min_size(batch): + +def get_min_size(batch, cfg_data): min_ht = cfg_data.TRAIN_SIZE[0] min_wd = cfg_data.TRAIN_SIZE[1] for i_sample in batch: - - _,ht,wd = i_sample.shape - if ht= self.c_size + assert len(keypoints) > 0 + i, j, h, w = random_crop(ht, wd, self.c_size, self.c_size) + img = F.crop(img, i, j, h, w) + + nearest_dis = np.clip(0.8*keypoints[:, 2], 4.0, 40.0) + points_left_up = keypoints[:, :2] - nearest_dis[:, None] / 2.0 + points_right_down = keypoints[:, :2] + nearest_dis[:, None] / 2.0 + bbox = np.concatenate((points_left_up, points_right_down), axis=1) + inner_area = cal_innner_area(j, i, j+w, i+h, bbox) + origin_area = nearest_dis * nearest_dis + ratio = np.clip(1.0 * inner_area / origin_area, 0.0, 1.0) + mask = (ratio >= 0.5) + keypoints = keypoints[mask] + keypoints = keypoints[:, :2] - [j, i] # change coodinate + target = np.ones(len(keypoints)) + + if len(keypoints) > 0: + if random.random() > 0.5: + img = F.hflip(img) + keypoints[:, 0] = w - keypoints[:, 0] + else: + if random.random() > 0.5: + img = F.hflip(img) + return self.trans(img), torch.from_numpy(keypoints.copy()).float(), \ + torch.from_numpy(target.copy()).float(), st_size diff --git a/dropin_config.py b/dropin_config.py new file mode 100644 index 0000000..7b38f8c --- /dev/null +++ b/dropin_config.py @@ -0,0 +1,42 @@ +import os +from easydict import EasyDict as edict +import time +import torch + +# For easy copy-paste in a colab environment +__C = edict() +cfg = __C +__C.SEED = 3035 +__C.DATASET = 'SHHB' +if __C.DATASET == 'UCF50': + from datasets.UCF50.setting import cfg_data + __C.VAL_INDEX = cfg_data.VAL_INDEX +if __C.DATASET == 'GCC': + from datasets.GCC.setting import cfg_data + __C.VAL_MODE = cfg_data.VAL_MODE +__C.NET = 'Res101_SFCN' +__C.PRE_GCC = False +__C.PRE_GCC_MODEL = 'path to model' +__C.RESUME = False +__C.RESUME_PATH = './exp/04-25_09-19_SHHB_VGG_1e-05/latest_state.pth' +__C.GPU_ID = [0,1] +__C.LR = 1e-5 +__C.LR_DECAY = 0.995 +__C.LR_DECAY_START = -1 +__C.NUM_EPOCH_LR_DECAY = 1 +__C.MAX_EPOCH = 200 +__C.LAMBDA_1 = 1e-4 +__C.PRINT_FREQ = 10 +now = time.strftime("%m-%d_%H-%M", time.localtime()) +__C.EXP_NAME = now \ + + '_' + __C.DATASET \ + + '_' + __C.NET \ + + '_' + str(__C.LR) +if __C.DATASET == 'UCF50': + __C.EXP_NAME += '_' + str(__C.VAL_INDEX) +if __C.DATASET == 'GCC': + __C.EXP_NAME += '_' + __C.VAL_MODE +__C.EXP_PATH = './exp' +__C.VAL_DENSE_START = 50 +__C.VAL_FREQ = 10 +__C.VISIBLE_NUM_IMGS = 1 \ No newline at end of file diff --git a/losses/__init__.py b/losses/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/losses/__init__.py @@ -0,0 +1 @@ + diff --git a/losses/bay_loss.py b/losses/bay_loss.py new file mode 100644 index 0000000..a656592 --- /dev/null +++ b/losses/bay_loss.py @@ -0,0 +1,30 @@ +from torch.nn.modules import Module +import torch + +class Bay_Loss(Module): + def __init__(self, use_background, device): + super(Bay_Loss, self).__init__() + self.device = device + self.use_bg = use_background + + def forward(self, prob_list, target_list, pre_density): + loss = 0 + for idx, prob in enumerate(prob_list): # iterative through each sample + if prob is None: # image contains no annotation points + pre_count = torch.sum(pre_density[idx]) + target = torch.zeros((1,), dtype=torch.float32, device=self.device) + else: + N = len(prob) + if self.use_bg: + target = torch.zeros((N,), dtype=torch.float32, device=self.device) + target[:-1] = target_list[idx] + else: + target = target_list[idx] + pre_count = torch.sum(pre_density[idx].view((1, -1)) * prob, dim=1) # flatten into vector + + loss += torch.sum(torch.abs(target - pre_count)) + loss = loss / len(prob_list) + return loss + + + diff --git a/losses/post_prob.py b/losses/post_prob.py new file mode 100644 index 0000000..c634fc7 --- /dev/null +++ b/losses/post_prob.py @@ -0,0 +1,52 @@ +import torch +from torch.nn import Module + +class Post_Prob(Module): + def __init__(self, sigma, c_size, stride, background_ratio, use_background, device): + super(Post_Prob, self).__init__() + assert c_size % stride == 0 + + self.sigma = sigma + self.bg_ratio = background_ratio + self.device = device + # coordinate is same to image space, set to constant since crop size is same + self.cood = torch.arange(0, c_size, step=stride, + dtype=torch.float32, device=device) + stride / 2 + self.cood.unsqueeze_(0) + self.softmax = torch.nn.Softmax(dim=0) + self.use_bg = use_background + + def forward(self, points, st_sizes): + num_points_per_image = [len(points_per_image) for points_per_image in points] + all_points = torch.cat(points, dim=0) + + if len(all_points) > 0: + x = all_points[:, 0].unsqueeze_(1) + y = all_points[:, 1].unsqueeze_(1) + x_dis = -2 * torch.matmul(x, self.cood) + x * x + self.cood * self.cood + y_dis = -2 * torch.matmul(y, self.cood) + y * y + self.cood * self.cood + y_dis.unsqueeze_(2) + x_dis.unsqueeze_(1) + dis = y_dis + x_dis + dis = dis.view((dis.size(0), -1)) + + dis_list = torch.split(dis, num_points_per_image) + prob_list = [] + for dis, st_size in zip(dis_list, st_sizes): + if len(dis) > 0: + if self.use_bg: + min_dis = torch.clamp(torch.min(dis, dim=0, keepdim=True)[0], min=0.0) + bg_dis = (st_size * self.bg_ratio) ** 2 / (min_dis + 1e-5) + dis = torch.cat([dis, bg_dis], 0) # concatenate background distance to the last + dis = -dis / (2.0 * self.sigma ** 2) + prob = self.softmax(dis) + else: + prob = None + prob_list.append(prob) + else: + prob_list = [] + for _ in range(len(points)): + prob_list.append(None) + return prob_list + + diff --git a/misc/bay_loss.py b/misc/bay_loss.py new file mode 100644 index 0000000..e2c48c9 --- /dev/null +++ b/misc/bay_loss.py @@ -0,0 +1,58 @@ +import torch +import torch.nn as nn +from torch.nn.parameter import Parameter +from torch.nn import functional as F +from torch.autograd import Variable +from torch.nn.modules.loss import _Loss +from torch.nn.modules import Module +from post_prob import Post_Prob +from bay_loss_trainer import parse_args + +import numpy as np + +class Bay_Loss(_Loss): + def __init__(self, use_background, device): + super(Bay_Loss, self).__init__() + self.device = device + self.use_bg = use_background + + def forward(self, prob_list, target_list, pre_density): + loss = 0 + for idx, prob in enumerate(prob_list): # iterative through each sample + if prob is None: # image contains no annotation points + pre_count = torch.sum(pre_density[idx]) + target = torch.zeros((1,), dtype=torch.float32, device=self.device) + else: + N = len(prob) + if self.use_bg: + target = torch.zeros((N,), dtype=torch.float32, device=self.device) + target[:-1] = target_list[idx] + else: + target = target_list[idx] + pre_count = torch.sum(pre_density[idx].view((1, -1)) * prob, dim=1) # flatten into vector + + loss += torch.sum(torch.abs(target - pre_count)) + loss = loss / len(prob_list) + + return loss + +if __name__ == "__main__": + args=parse_args() + data = torch.zeros(1, 1, 1, 1) + data += 0.001 + target = torch.zeros(1, 1, 1, 1) + data = Variable(data, requires_grad=True) + target = Variable(target) + device=torch.device("cpu") + post_prob=Post_Prob(args.sigma, + args.crop_size, + args.downsample_ratio, + args.background_ratio, + args.use_background, + device) + prob_list = post_prob(points, st_sizes) + model = Bay_Loss(True,device) + loss = model(post_prob,data, target) + loss.backward() + print(loss) + print(data.grad) \ No newline at end of file diff --git a/misc/bay_loss_trainer.py b/misc/bay_loss_trainer.py new file mode 100644 index 0000000..a5e77e9 --- /dev/null +++ b/misc/bay_loss_trainer.py @@ -0,0 +1,59 @@ +#from misc.bay_loss import RegTrainer +import argparse +import os +import torch +args = None + +def parse_args(): + parser = argparse.ArgumentParser(description='Train ') + parser.add_argument('--data-dir', default='/home/teddy/UCF-Train-Val-Test', + help='training data directory') + parser.add_argument('--save-dir', default='/home/teddy/vgg', + help='directory to save models.') + + parser.add_argument('--lr', type=float, default=1e-5, + help='the initial learning rate') + parser.add_argument('--weight-decay', type=float, default=1e-4, + help='the weight decay') + parser.add_argument('--resume', default='', + help='the path of resume training model') + parser.add_argument('--max-model-num', type=int, default=1, + help='max models num to save ') + parser.add_argument('--max-epoch', type=int, default=1000, + help='max training epoch') + parser.add_argument('--val-epoch', type=int, default=5, + help='the num of steps to log training information') + parser.add_argument('--val-start', type=int, default=600, + help='the epoch start to val') + + parser.add_argument('--batch-size', type=int, default=1, + help='train batch size') + parser.add_argument('--device', default='0', help='assign device') + parser.add_argument('--num-workers', type=int, default=8, + help='the num of training process') + + parser.add_argument('--is-gray', type=bool, default=False, + help='whether the input image is gray') + parser.add_argument('--crop-size', type=int, default=512, + help='the crop size of the train image') + parser.add_argument('--downsample-ratio', type=int, default=8, + help='downsample ratio') + + parser.add_argument('--use-background', type=bool, default=True, + help='whether to use background modelling') + parser.add_argument('--sigma', type=float, default=8.0, + help='sigma for likelihood') + parser.add_argument('--background-ratio', type=float, default=1.0, + help='background ratio') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + print("args",args) + torch.backends.cudnn.benchmark = True + os.environ['CUDA_VISIBLE_DEVICES'] = args.device.strip() # set vis gpu + #trainer = RegTrainer(args) + #trainer.setup() + #trainer.train() \ No newline at end of file diff --git a/misc/cal_mean.py b/misc/cal_mean.py index 48313b3..bd3304c 100644 --- a/misc/cal_mean.py +++ b/misc/cal_mean.py @@ -2,42 +2,46 @@ import torchvision.datasets as dset - import pdb from PIL import Image import numpy as np import os -# TODO +# TODO + def make_parser(): parser = argparse.ArgumentParser() - parser.add_argument('--trainDataPath', type=str, default='/media/D/DataSet/UCF-QNRF_ECCV18/train_img', - help='absolute path to your data path') + parser.add_argument( + "--trainDataPath", + type=str, + default="/media/D/DataSet/UCF-QNRF_ECCV18/train_img", + help="absolute path to your data path", + ) return parser -if __name__ == '__main__': + +if __name__ == "__main__": args = make_parser().parse_args() imgs_list = [] for i_img, img_name in enumerate(os.listdir(args.trainDataPath)): if i_img % 100 == 0: - print( i_img ) + print(i_img) img = Image.open(os.path.join(args.trainDataPath, img_name)) - if img.mode == 'L': - img = img.convert('RGB') + if img.mode == "L": + img = img.convert("RGB") - img = np.array(img.resize((1024,768),Image.BILINEAR)) + img = np.array(img.resize((1024, 768), Image.BILINEAR)) imgs_list.append(img) - imgs = np.array(imgs_list).astype(np.float32)/255. - red = imgs[:,:,:,0] - green = imgs[:,:,:,1] - blue = imgs[:,:,:,2] - + imgs = np.array(imgs_list).astype(np.float32) / 255.0 + red = imgs[:, :, :, 0] + green = imgs[:, :, :, 1] + blue = imgs[:, :, :, 2] - print("means: [{}, {}, {}]".format(np.mean(red),np.mean(green),np.mean(blue))) - print("stdevs: [{}, {}, {}]".format(np.std(red),np.std(green),np.std(blue))) + print("means: [{}, {}, {}]".format(np.mean(red), np.mean(green), np.mean(blue))) + print("stdevs: [{}, {}, {}]".format(np.std(red), np.std(green), np.std(blue))) diff --git a/misc/layer.py b/misc/layer.py index 1ac723a..22a4696 100644 --- a/misc/layer.py +++ b/misc/layer.py @@ -3,19 +3,47 @@ class Conv2d(nn.Module): - def __init__(self, in_channels, out_channels, kernel_size, stride=1, NL='relu', same_padding=False, bn=False, dilation=1): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + NL="relu", + same_padding=False, + bn=False, + dilation=1, + ): super(Conv2d, self).__init__() padding = int((kernel_size - 1) // 2) if same_padding else 0 self.conv = [] - if dilation==1: - self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=padding, dilation=dilation) + if dilation == 1: + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding=padding, + dilation=dilation, + ) else: - self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=dilation, dilation=dilation) - self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0, affine=True) if bn else None - if NL == 'relu' : - self.relu = nn.ReLU(inplace=True) - elif NL == 'prelu': - self.relu = nn.PReLU() + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding=dilation, + dilation=dilation, + ) + self.bn = ( + nn.BatchNorm2d(out_channels, eps=0.001, momentum=0, affine=True) + if bn + else None + ) + if NL == "relu": + self.relu = nn.ReLU(inplace=True) + elif NL == "prelu": + self.relu = nn.PReLU() else: self.relu = None @@ -29,13 +57,13 @@ def forward(self, x): class FC(nn.Module): - def __init__(self, in_features, out_features, NL='relu'): + def __init__(self, in_features, out_features, NL="relu"): super(FC, self).__init__() self.fc = nn.Linear(in_features, out_features) - if NL == 'relu' : - self.relu = nn.ReLU(inplace=True) - elif NL == 'prelu': - self.relu = nn.PReLU() + if NL == "relu": + self.relu = nn.ReLU(inplace=True) + elif NL == "prelu": + self.relu = nn.PReLU() else: self.relu = None @@ -47,69 +75,72 @@ def forward(self, x): class convDU(nn.Module): - - def __init__(self, - in_out_channels=2048, - kernel_size=(9,1) - ): + def __init__(self, in_out_channels=2048, kernel_size=(9, 1)): super(convDU, self).__init__() self.conv = nn.Sequential( - nn.Conv2d(in_out_channels, in_out_channels, kernel_size, stride=1, padding=((kernel_size[0]-1)//2,(kernel_size[1]-1)//2)), - nn.ReLU(inplace=True) - ) + nn.Conv2d( + in_out_channels, + in_out_channels, + kernel_size, + stride=1, + padding=((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2), + ), + nn.ReLU(inplace=True), + ) def forward(self, fea): n, c, h, w = fea.size() fea_stack = [] for i in range(h): - i_fea = fea.select(2, i).resize(n,c,1,w) + i_fea = fea.select(2, i).resize(n, c, 1, w) if i == 0: fea_stack.append(i_fea) continue - fea_stack.append(self.conv(fea_stack[i-1])+i_fea) + fea_stack.append(self.conv(fea_stack[i - 1]) + i_fea) # pdb.set_trace() # fea[:,i,:,:] = self.conv(fea[:,i-1,:,:].expand(n,1,h,w))+fea[:,i,:,:].expand(n,1,h,w) - for i in range(h): - pos = h-i-1 - if pos == h-1: + pos = h - i - 1 + if pos == h - 1: continue - fea_stack[pos] = self.conv(fea_stack[pos+1])+fea_stack[pos] + fea_stack[pos] = self.conv(fea_stack[pos + 1]) + fea_stack[pos] # pdb.set_trace() fea = torch.cat(fea_stack, 2) return fea -class convLR(nn.Module): - def __init__(self, - in_out_channels=2048, - kernel_size=(1,9) - ): +class convLR(nn.Module): + def __init__(self, in_out_channels=2048, kernel_size=(1, 9)): super(convLR, self).__init__() self.conv = nn.Sequential( - nn.Conv2d(in_out_channels, in_out_channels, kernel_size, stride=1, padding=((kernel_size[0]-1)//2,(kernel_size[1]-1)//2)), - nn.ReLU(inplace=True) - ) + nn.Conv2d( + in_out_channels, + in_out_channels, + kernel_size, + stride=1, + padding=((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2), + ), + nn.ReLU(inplace=True), + ) def forward(self, fea): n, c, h, w = fea.size() fea_stack = [] for i in range(w): - i_fea = fea.select(3, i).resize(n,c,h,1) + i_fea = fea.select(3, i).resize(n, c, h, 1) if i == 0: fea_stack.append(i_fea) continue - fea_stack.append(self.conv(fea_stack[i-1])+i_fea) + fea_stack.append(self.conv(fea_stack[i - 1]) + i_fea) for i in range(w): - pos = w-i-1 - if pos == w-1: + pos = w - i - 1 + if pos == w - 1: continue - fea_stack[pos] = self.conv(fea_stack[pos+1])+fea_stack[pos] - + fea_stack[pos] = self.conv(fea_stack[pos + 1]) + fea_stack[pos] fea = torch.cat(fea_stack, 3) - return fea \ No newline at end of file + return fea diff --git a/misc/post_prob.py b/misc/post_prob.py new file mode 100644 index 0000000..dcfef3d --- /dev/null +++ b/misc/post_prob.py @@ -0,0 +1,50 @@ +import torch +from torch.nn import Module + +class Post_Prob(Module): + def __init__(self, sigma, c_size, stride, background_ratio, use_background, device): + super(Post_Prob, self).__init__() + assert c_size % stride == 0 + + self.sigma = sigma + self.bg_ratio = background_ratio + self.device = device + # coordinate is same to image space, set to constant since crop size is same + self.cood = torch.arange(0, c_size, step=stride, + dtype=torch.float32, device=device) + stride / 2 + self.cood.unsqueeze_(0) + self.softmax = torch.nn.Softmax(dim=0) + self.use_bg = use_background + + def forward(self, points, st_sizes): + num_points_per_image = [len(points_per_image) for points_per_image in points] + all_points = torch.cat(points, dim=0) + + if len(all_points) > 0: + x = all_points[:, 0].unsqueeze_(1) + y = all_points[:, 1].unsqueeze_(1) + x_dis = -2 * torch.matmul(x, self.cood) + x * x + self.cood * self.cood + y_dis = -2 * torch.matmul(y, self.cood) + y * y + self.cood * self.cood + y_dis.unsqueeze_(2) + x_dis.unsqueeze_(1) + dis = y_dis + x_dis + dis = dis.view((dis.size(0), -1)) + + dis_list = torch.split(dis, num_points_per_image) + prob_list = [] + for dis, st_size in zip(dis_list, st_sizes): + if len(dis) > 0: + if self.use_bg: + min_dis = torch.clamp(torch.min(dis, dim=0, keepdim=True)[0], min=0.0) + bg_dis = (st_size * self.bg_ratio) ** 2 / (min_dis + 1e-5) + dis = torch.cat([dis, bg_dis], 0) # concatenate background distance to the last + dis = -dis / (2.0 * self.sigma ** 2) + prob = self.softmax(dis) + else: + prob = None + prob_list.append(prob) + else: + prob_list = [] + for _ in range(len(points)): + prob_list.append(None) + return prob_list diff --git a/misc/ssim_loss.py b/misc/ssim_loss.py index 0d7bcef..50ae31c 100644 --- a/misc/ssim_loss.py +++ b/misc/ssim_loss.py @@ -8,16 +8,18 @@ from torch.nn.modules.loss import _assert_no_grad, _Loss import numpy as np + def gaussian_kernel(size, sigma): - x, y = np.mgrid[-size:size+1, -size:size+1] - kernel = np.exp(-0.5*(x*x+y*y)/(sigma*sigma)) + x, y = np.mgrid[-size : size + 1, -size : size + 1] + kernel = np.exp(-0.5 * (x * x + y * y) / (sigma * sigma)) kernel /= kernel.sum() return kernel + class SSIM_Loss(_Loss): def __init__(self, in_channels, size=11, sigma=1.5, size_average=True): super(SSIM_Loss, self).__init__(size_average) - #assert in_channels == 1, 'Only support single-channel input' + # assert in_channels == 1, 'Only support single-channel input' self.in_channels = in_channels self.size = int(size) self.sigma = sigma @@ -31,19 +33,38 @@ def __init__(self, in_channels, size=11, sigma=1.5, size_average=True): def forward(self, input, target, mask=None): _assert_no_grad(target) mean1 = F.conv2d(input, self.weight, padding=self.size, groups=self.in_channels) - mean2 = F.conv2d(target, self.weight, padding=self.size, groups=self.in_channels) - mean1_sq = mean1*mean1 - mean2_sq = mean2*mean2 - mean_12 = mean1*mean2 + mean2 = F.conv2d( + target, self.weight, padding=self.size, groups=self.in_channels + ) + mean1_sq = mean1 * mean1 + mean2_sq = mean2 * mean2 + mean_12 = mean1 * mean2 + + sigma1_sq = ( + F.conv2d( + input * input, self.weight, padding=self.size, groups=self.in_channels + ) + - mean1_sq + ) + sigma2_sq = ( + F.conv2d( + target * target, self.weight, padding=self.size, groups=self.in_channels + ) + - mean2_sq + ) + sigma_12 = ( + F.conv2d( + input * target, self.weight, padding=self.size, groups=self.in_channels + ) + - mean_12 + ) - sigma1_sq = F.conv2d(input*input, self.weight, padding=self.size, groups=self.in_channels) - mean1_sq - sigma2_sq = F.conv2d(target*target, self.weight, padding=self.size, groups=self.in_channels) - mean2_sq - sigma_12 = F.conv2d(input*target, self.weight, padding=self.size, groups=self.in_channels) - mean_12 - C1 = 0.01**2 C2 = 0.03**2 - ssim = ((2*mean_12+C1)*(2*sigma_12+C2)) / ((mean1_sq+mean2_sq+C1)*(sigma1_sq+sigma2_sq+C2)) + ssim = ((2 * mean_12 + C1) * (2 * sigma_12 + C2)) / ( + (mean1_sq + mean2_sq + C1) * (sigma1_sq + sigma2_sq + C2) + ) if self.size_average: out = 1 - ssim.mean() else: @@ -51,7 +72,7 @@ def forward(self, input, target, mask=None): return out -if __name__ == '__main__': +if __name__ == "__main__": data = torch.zeros(1, 1, 1, 1) data += 0.001 target = torch.zeros(1, 1, 1, 1) @@ -59,7 +80,7 @@ def forward(self, input, target, mask=None): target = Variable(target) model = SSIM_Loss(1) - loss = model(data, target) + loss = model(data, target) loss.backward() print(loss) - print(data.grad) \ No newline at end of file + print(data.grad) diff --git a/misc/transforms.py b/misc/transforms.py index b4db486..9df9f6c 100644 --- a/misc/transforms.py +++ b/misc/transforms.py @@ -4,8 +4,10 @@ from PIL import Image, ImageOps, ImageFilter from config import cfg import torch + # ===============================img tranforms============================ + class Compose(object): def __init__(self, transforms): self.transforms = transforms @@ -19,21 +21,29 @@ def __call__(self, img, mask, bbx=None): img, mask, bbx = t(img, mask, bbx) return img, mask, bbx + class RandomHorizontallyFlip(object): def __call__(self, img, mask, bbx=None): if random.random() < 0.5: if bbx is None: - return img.transpose(Image.FLIP_LEFT_RIGHT), mask.transpose(Image.FLIP_LEFT_RIGHT) + return img.transpose(Image.FLIP_LEFT_RIGHT), mask.transpose( + Image.FLIP_LEFT_RIGHT + ) w, h = img.size - xmin = w - bbx[:,3] - xmax = w - bbx[:,1] - bbx[:,1] = xmin - bbx[:,3] = xmax - return img.transpose(Image.FLIP_LEFT_RIGHT), mask.transpose(Image.FLIP_LEFT_RIGHT), bbx + xmin = w - bbx[:, 3] + xmax = w - bbx[:, 1] + bbx[:, 1] = xmin + bbx[:, 3] = xmax + return ( + img.transpose(Image.FLIP_LEFT_RIGHT), + mask.transpose(Image.FLIP_LEFT_RIGHT), + bbx, + ) if bbx is None: return img, mask return img, mask, bbx + class RandomCrop(object): def __init__(self, size, padding=0): if isinstance(size, numbers.Number): @@ -56,11 +66,15 @@ def __call__(self, img, mask, dst_size=None): if w == tw and h == th: return img, mask if w < tw or h < th: - return img.resize((tw, th), Image.BILINEAR), mask.resize((tw, th), Image.NEAREST) + return img.resize((tw, th), Image.BILINEAR), mask.resize( + (tw, th), Image.NEAREST + ) x1 = random.randint(0, w - tw) y1 = random.randint(0, h - th) - return img.crop((x1, y1, x1 + tw, y1 + th)), mask.crop((x1, y1, x1 + tw, y1 + th)) + return img.crop((x1, y1, x1 + tw, y1 + th)), mask.crop( + (x1, y1, x1 + tw, y1 + th) + ) class CenterCrop(object): @@ -73,10 +87,11 @@ def __init__(self, size): def __call__(self, img, mask): w, h = img.size th, tw = self.size - x1 = int(round((w - tw) / 2.)) - y1 = int(round((h - th) / 2.)) - return img.crop((x1, y1, x1 + tw, y1 + th)), mask.crop((x1, y1, x1 + tw, y1 + th)) - + x1 = int(round((w - tw) / 2.0)) + y1 = int(round((h - th) / 2.0)) + return img.crop((x1, y1, x1 + tw, y1 + th)), mask.crop( + (x1, y1, x1 + tw, y1 + th) + ) class FreeScale(object): @@ -84,7 +99,9 @@ def __init__(self, size): self.size = size # (h, w) def __call__(self, img, mask): - return img.resize((self.size[1], self.size[0]), Image.BILINEAR), mask.resize((self.size[1], self.size[0]), Image.NEAREST) + return img.resize((self.size[1], self.size[0]), Image.BILINEAR), mask.resize( + (self.size[1], self.size[0]), Image.NEAREST + ) class ScaleDown(object): @@ -92,7 +109,10 @@ def __init__(self, size): self.size = size # (h, w) def __call__(self, mask): - return mask.resize((self.size[1]/cfg.TRAIN.DOWNRATE, self.size[0]/cfg.TRAIN.DOWNRATE), Image.NEAREST) + return mask.resize( + (self.size[1] / cfg.TRAIN.DOWNRATE, self.size[0] / cfg.TRAIN.DOWNRATE), + Image.NEAREST, + ) class Scale(object): @@ -101,8 +121,8 @@ def __init__(self, size): def __call__(self, img, mask): if img.size != mask.size: - print( img.size ) - print( mask.size ) + print(img.size) + print(mask.size) assert img.size == mask.size w, h = img.size if (w <= h and w == self.size) or (h <= w and h == self.size): @@ -110,15 +130,20 @@ def __call__(self, img, mask): if w < h: ow = self.size oh = int(self.size * h / w) - return img.resize((ow, oh), Image.BILINEAR), mask.resize((ow, oh), Image.NEAREST) + return img.resize((ow, oh), Image.BILINEAR), mask.resize( + (ow, oh), Image.NEAREST + ) else: oh = self.size ow = int(self.size * w / h) - return img.resize((ow, oh), Image.BILINEAR), mask.resize((ow, oh), Image.NEAREST) + return img.resize((ow, oh), Image.BILINEAR), mask.resize( + (ow, oh), Image.NEAREST + ) # ===============================label tranforms============================ + class DeNormalize(object): def __init__(self, mean, std): self.mean = mean @@ -142,17 +167,22 @@ def __init__(self, para): def __call__(self, tensor): # tensor = 1./(tensor+self.para).log() tensor = torch.from_numpy(np.array(tensor)) - tensor = tensor*self.para + tensor = tensor * self.para return tensor + class GTScaleDown(object): def __init__(self, factor=8): self.factor = factor def __call__(self, img): w, h = img.size - if self.factor==1: + if self.factor == 1: return img - tmp = np.array(img.resize((w//self.factor, h//self.factor), Image.BICUBIC))*self.factor*self.factor + tmp = ( + np.array(img.resize((w // self.factor, h // self.factor), Image.BICUBIC)) + * self.factor + * self.factor + ) img = Image.fromarray(tmp) return img diff --git a/misc/utils.py b/misc/utils.py index 85c9464..a16d2bb 100644 --- a/misc/utils.py +++ b/misc/utils.py @@ -26,7 +26,7 @@ def real_init_weights(m): for mini_m in m: real_init_weights(mini_m) else: - if isinstance(m, nn.Conv2d): + if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, std=0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) @@ -35,21 +35,22 @@ def real_init_weights(m): elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) - elif isinstance(m,nn.Module): + elif isinstance(m, nn.Module): for mini_m in m.children(): real_init_weights(mini_m) else: - print( m ) + print(m) + def weights_normal_init(*models): for model in models: - dev=0.01 + dev = 0.01 if isinstance(model, list): for m in model: weights_normal_init(m, dev) else: - for m in model.modules(): - if isinstance(m, nn.Conv2d): + for m in model.modules(): + if isinstance(m, nn.Conv2d): m.weight.data.normal_(0.0, dev) if m.bias is not None: m.bias.data.fill_(0.0) @@ -60,63 +61,58 @@ def weights_normal_init(*models): def logger(exp_path, exp_name, work_dir, exception, resume=False): from tensorboardX import SummaryWriter - + if not os.path.exists(exp_path): os.mkdir(exp_path) - writer = SummaryWriter(exp_path+ '/' + exp_name) - log_file = exp_path + '/' + exp_name + '/' + exp_name + '.txt' - - cfg_file = open('./config.py',"r") + writer = SummaryWriter(exp_path + "/" + exp_name) + log_file = exp_path + "/" + exp_name + "/" + exp_name + ".txt" + + cfg_file = open("./config.py", "r") cfg_lines = cfg_file.readlines() - - with open(log_file, 'a') as f: - f.write(''.join(cfg_lines) + '\n\n\n\n') - if not resume: - copy_cur_env(work_dir, exp_path+ '/' + exp_name + '/code', exception) + with open(log_file, "a") as f: + f.write("".join(cfg_lines) + "\n\n\n\n") + if not resume: + copy_cur_env(work_dir, exp_path + "/" + exp_name + "/code", exception) return writer, log_file - def logger_for_CMTL(exp_path, exp_name, work_dir, exception, resume=False): - + if not os.path.exists(exp_path): os.mkdir(exp_path) - if not os.path.exists(exp_path+ '/' + exp_name): - os.mkdir(exp_path+ '/' + exp_name) - log_file = exp_path + '/' + exp_name + '/' + exp_name + '.txt' - - cfg_file = open('./config.py',"r") + if not os.path.exists(exp_path + "/" + exp_name): + os.mkdir(exp_path + "/" + exp_name) + log_file = exp_path + "/" + exp_name + "/" + exp_name + ".txt" + + cfg_file = open("./config.py", "r") cfg_lines = cfg_file.readlines() - - with open(log_file, 'a') as f: - f.write(''.join(cfg_lines) + '\n\n\n\n') - if not resume: - copy_cur_env(work_dir, exp_path+ '/' + exp_name + '/code', exception) + with open(log_file, "a") as f: + f.write("".join(cfg_lines) + "\n\n\n\n") + if not resume: + copy_cur_env(work_dir, exp_path + "/" + exp_name + "/code", exception) return log_file -def logger_txt(log_file,epoch,scores): + +def logger_txt(log_file, epoch, scores): mae, mse, loss = scores - snapshot_name = 'all_ep_%d_mae_%.1f_mse_%.1f' % (epoch + 1, mae, mse) + snapshot_name = "all_ep_%d_mae_%.1f_mse_%.1f" % (epoch + 1, mae, mse) # pdb.set_trace() - with open(log_file, 'a') as f: - f.write('='*15 + '+'*15 + '='*15 + '\n\n') - f.write(snapshot_name + '\n') - f.write(' [mae %.2f mse %.2f], [val loss %.4f]\n' % (mae, mse, loss)) - f.write('='*15 + '+'*15 + '='*15 + '\n\n') - - - + with open(log_file, "a") as f: + f.write("=" * 15 + "+" * 15 + "=" * 15 + "\n\n") + f.write(snapshot_name + "\n") + f.write(" [mae %.2f mse %.2f], [val loss %.4f]\n" % (mae, mse, loss)) + f.write("=" * 15 + "+" * 15 + "=" * 15 + "\n\n") def vis_results(exp_name, epoch, writer, restore, img, pred_map, gt_map): @@ -124,112 +120,159 @@ def vis_results(exp_name, epoch, writer, restore, img, pred_map, gt_map): pil_to_tensor = standard_transforms.ToTensor() x = [] - + for idx, tensor in enumerate(zip(img.cpu().data, pred_map, gt_map)): - if idx>1:# show only one group + if idx > 1: # show only one group break pil_input = restore(tensor[0]) - pil_output = torch.from_numpy(tensor[1]/(tensor[2].max()+1e-10)).repeat(3,1,1) - pil_label = torch.from_numpy(tensor[2]/(tensor[2].max()+1e-10)).repeat(3,1,1) - x.extend([pil_to_tensor(pil_input.convert('RGB')), pil_label, pil_output]) + pil_output = torch.from_numpy(tensor[1] / (tensor[2].max() + 1e-10)).repeat( + 3, 1, 1 + ) + pil_label = torch.from_numpy(tensor[2] / (tensor[2].max() + 1e-10)).repeat( + 3, 1, 1 + ) + x.extend([pil_to_tensor(pil_input.convert("RGB")), pil_label, pil_output]) x = torch.stack(x, 0) x = vutils.make_grid(x, nrow=3, padding=5) - x = (x.numpy()*255).astype(np.uint8) + x = (x.numpy() * 255).astype(np.uint8) - writer.add_image(exp_name + '_epoch_' + str(epoch+1), x) + writer.add_image(exp_name + "_epoch_" + str(epoch + 1), x) - -def print_summary(exp_name,scores,train_record): +def print_summary(exp_name, scores, train_record): mae, mse, loss = scores - print( '='*50 ) - print( exp_name ) - print( ' '+ '-'*20 ) - print( ' [mae %.2f mse %.2f], [val loss %.4f]' % (mae, mse, loss) ) - print( ' '+ '-'*20 ) - print( '[best] [model: %s] , [mae %.2f], [mse %.2f]' % (train_record['best_model_name'],\ - train_record['best_mae'],\ - train_record['best_mse']) ) - print( '='*50) - -def print_WE_summary(log_txt,epoch,scores,train_record,c_maes): + print("=" * 50) + print(exp_name) + print(" " + "-" * 20) + print(" [mae %.2f mse %.2f], [val loss %.4f]" % (mae, mse, loss)) + print(" " + "-" * 20) + print( + "[best] [model: %s] , [mae %.2f], [mse %.2f]" + % ( + train_record["best_model_name"], + train_record["best_mae"], + train_record["best_mse"], + ) + ) + print("=" * 50) + + +def print_WE_summary(log_txt, epoch, scores, train_record, c_maes): mae, mse, loss = scores # pdb.set_trace() - with open(log_txt, 'a') as f: - f.write('='*15 + '+'*15 + '='*15 + '\n') - f.write(str(epoch) + '\n\n') - f.write(' [mae %.4f], [val loss %.4f]\n\n' % (mae, loss)) - f.write(' list: ' + str(np.transpose(c_maes.avg)) + '\n') - - - f.write('='*15 + '+'*15 + '='*15 + '\n\n') - - print( '='*50 ) - print( ' '+ '-'*20 ) - print( ' [mae %.2f mse %.2f], [val loss %.4f]' % (mae, mse, loss) ) - print( ' '+ '-'*20 ) - print( '[best] [model: %s] , [mae %.2f], [mse %.2f]' % (train_record['best_model_name'],\ - train_record['best_mae'],\ - train_record['best_mse']) ) - print( '='*50 ) - - -def print_GCC_summary(log_txt,epoch, scores,train_record,c_maes,c_mses): + with open(log_txt, "a") as f: + f.write("=" * 15 + "+" * 15 + "=" * 15 + "\n") + f.write(str(epoch) + "\n\n") + f.write(" [mae %.4f], [val loss %.4f]\n\n" % (mae, loss)) + f.write(" list: " + str(np.transpose(c_maes.avg)) + "\n") + + f.write("=" * 15 + "+" * 15 + "=" * 15 + "\n\n") + + print("=" * 50) + print(" " + "-" * 20) + print(" [mae %.2f mse %.2f], [val loss %.4f]" % (mae, mse, loss)) + print(" " + "-" * 20) + print( + "[best] [model: %s] , [mae %.2f], [mse %.2f]" + % ( + train_record["best_model_name"], + train_record["best_mae"], + train_record["best_mse"], + ) + ) + print("=" * 50) + + +def print_GCC_summary(log_txt, epoch, scores, train_record, c_maes, c_mses): mae, mse, loss = scores - c_mses['level'] = np.sqrt(c_mses['level'].avg) - c_mses['time'] = np.sqrt(c_mses['time'].avg) - c_mses['weather'] = np.sqrt(c_mses['weather'].avg) - with open(log_txt, 'a') as f: - f.write('='*15 + '+'*15 + '='*15 + '\n') - f.write(str(epoch) + '\n\n') - f.write(' [mae %.4f mse %.4f], [val loss %.4f]\n\n' % (mae, mse, loss)) - f.write(' [level: mae %.4f mse %.4f]\n' % (np.average(c_maes['level'].avg), np.average(c_mses['level']))) - f.write(' list: ' + str(np.transpose(c_maes['level'].avg)) + '\n') - f.write(' list: ' + str(np.transpose(c_mses['level'])) + '\n\n') - - f.write(' [time: mae %.4f mse %.4f]\n' % (np.average(c_maes['time'].avg), np.average(c_mses['time']))) - f.write(' list: ' + str(np.transpose(c_maes['time'].avg)) + '\n') - f.write(' list: ' + str(np.transpose(c_mses['time'])) + '\n\n') - - f.write(' [weather: mae %.4f mse %.4f]\n' % (np.average(c_maes['weather'].avg), np.average(c_mses['weather']))) - f.write(' list: ' + str(np.transpose(c_maes['weather'].avg)) + '\n') - f.write(' list: ' + str(np.transpose(c_mses['weather']))+ '\n\n') - - f.write('='*15 + '+'*15 + '='*15 + '\n\n') - - print( '='*50 ) - print( ' '+ '-'*20 ) - print( ' [mae %.2f mse %.2f], [val loss %.4f]' % (mae, mse, loss) ) - print( ' '+ '-'*20 ) - print( '[best] [model: %s] , [mae %.2f], [mse %.2f]' % (train_record['best_model_name'],\ - train_record['best_mae'],\ - train_record['best_mse']) ) - print( '='*50 ) - - -def update_model(net,optimizer,scheduler,epoch,i_tb,exp_path,exp_name,scores,train_record,log_file=None): + c_mses["level"] = np.sqrt(c_mses["level"].avg) + c_mses["time"] = np.sqrt(c_mses["time"].avg) + c_mses["weather"] = np.sqrt(c_mses["weather"].avg) + with open(log_txt, "a") as f: + f.write("=" * 15 + "+" * 15 + "=" * 15 + "\n") + f.write(str(epoch) + "\n\n") + f.write(" [mae %.4f mse %.4f], [val loss %.4f]\n\n" % (mae, mse, loss)) + f.write( + " [level: mae %.4f mse %.4f]\n" + % (np.average(c_maes["level"].avg), np.average(c_mses["level"])) + ) + f.write(" list: " + str(np.transpose(c_maes["level"].avg)) + "\n") + f.write(" list: " + str(np.transpose(c_mses["level"])) + "\n\n") + + f.write( + " [time: mae %.4f mse %.4f]\n" + % (np.average(c_maes["time"].avg), np.average(c_mses["time"])) + ) + f.write(" list: " + str(np.transpose(c_maes["time"].avg)) + "\n") + f.write(" list: " + str(np.transpose(c_mses["time"])) + "\n\n") + + f.write( + " [weather: mae %.4f mse %.4f]\n" + % (np.average(c_maes["weather"].avg), np.average(c_mses["weather"])) + ) + f.write(" list: " + str(np.transpose(c_maes["weather"].avg)) + "\n") + f.write(" list: " + str(np.transpose(c_mses["weather"])) + "\n\n") + + f.write("=" * 15 + "+" * 15 + "=" * 15 + "\n\n") + + print("=" * 50) + print(" " + "-" * 20) + print(" [mae %.2f mse %.2f], [val loss %.4f]" % (mae, mse, loss)) + print(" " + "-" * 20) + print( + "[best] [model: %s] , [mae %.2f], [mse %.2f]" + % ( + train_record["best_model_name"], + train_record["best_mae"], + train_record["best_mse"], + ) + ) + print("=" * 50) + + +def update_model( + net, + optimizer, + scheduler, + epoch, + i_tb, + exp_path, + exp_name, + scores, + train_record, + log_file=None, +): mae, mse, loss = scores - snapshot_name = 'all_ep_%d_mae_%.1f_mse_%.1f' % (epoch + 1, mae, mse) + snapshot_name = "all_ep_%d_mae_%.1f_mse_%.1f" % (epoch + 1, mae, mse) - if mae < train_record['best_mae'] or mse < train_record['best_mse']: - train_record['best_model_name'] = snapshot_name + if mae < train_record["best_mae"] or mse < train_record["best_mse"]: + train_record["best_model_name"] = snapshot_name if log_file is not None: - logger_txt(log_file,epoch,scores) + logger_txt(log_file, epoch, scores) to_saved_weight = net.state_dict() - torch.save(to_saved_weight, os.path.join(exp_path, exp_name, snapshot_name + '.pth')) - - if mae < train_record['best_mae']: - train_record['best_mae'] = mae - if mse < train_record['best_mse']: - train_record['best_mse'] = mse - - latest_state = {'train_record':train_record, 'net':net.state_dict(), 'optimizer':optimizer.state_dict(),\ - 'scheduler':scheduler.state_dict(), 'epoch': epoch, 'i_tb':i_tb, 'exp_path':exp_path, \ - 'exp_name':exp_name} - - torch.save(latest_state,os.path.join(exp_path, exp_name, 'latest_state.pth')) + torch.save( + to_saved_weight, os.path.join(exp_path, exp_name, snapshot_name + ".pth") + ) + + if mae < train_record["best_mae"]: + train_record["best_mae"] = mae + if mse < train_record["best_mse"]: + train_record["best_mse"] = mse + + latest_state = { + "train_record": train_record, + "net": net.state_dict(), + "optimizer": optimizer.state_dict(), + "scheduler": scheduler.state_dict(), + "epoch": epoch, + "i_tb": i_tb, + "exp_path": exp_path, + "exp_name": exp_name, + } + + torch.save(latest_state, os.path.join(exp_path, exp_name, "latest_state.pth")) return train_record @@ -241,16 +284,13 @@ def copy_cur_env(work_dir, dst_dir, exception): for filename in os.listdir(work_dir): - file = os.path.join(work_dir,filename) - dst_file = os.path.join(dst_dir,filename) - + file = os.path.join(work_dir, filename) + dst_file = os.path.join(dst_dir, filename) if os.path.isdir(file) and exception not in filename: shutil.copytree(file, dst_file) elif os.path.isfile(file): - shutil.copyfile(file,dst_file) - - + shutil.copyfile(file, dst_file) class AverageMeter(object): @@ -271,10 +311,11 @@ def update(self, cur_val): self.count += 1 self.avg = self.sum / self.count + class AverageCategoryMeter(object): """Computes and stores the average and current value""" - def __init__(self,num_class): + def __init__(self, num_class): self.num_class = num_class self.reset() @@ -293,12 +334,13 @@ def update(self, cur_val, class_id): class Timer(object): """A simple timer.""" + def __init__(self): - self.total_time = 0. + self.total_time = 0.0 self.calls = 0 - self.start_time = 0. - self.diff = 0. - self.average_time = 0. + self.start_time = 0.0 + self.diff = 0.0 + self.average_time = 0.0 def tic(self): # using time.time instead of time.clock because time time.clock @@ -314,11 +356,3 @@ def toc(self, average=True): return self.average_time else: return self.diff - - - - - - - - diff --git a/models/CC.py b/models/CC.py index cc7f2a6..c2c404e 100644 --- a/models/CC.py +++ b/models/CC.py @@ -2,49 +2,38 @@ import torch.nn as nn import torch.nn.functional as F import pdb +import imp + class CrowdCounter(nn.Module): - def __init__(self,gpus,model_name): - super(CrowdCounter, self).__init__() - - if model_name == 'AlexNet': - from .SCC_Model.AlexNet import AlexNet as net - elif model_name == 'VGG': - from .SCC_Model.VGG import VGG as net - elif model_name == 'VGG_DECODER': - from .SCC_Model.VGG_decoder import VGG_decoder as net - elif model_name == 'MCNN': - from .SCC_Model.MCNN import MCNN as net - elif model_name == 'CSRNet': - from .SCC_Model.CSRNet import CSRNet as net - elif model_name == 'Res50': - from .SCC_Model.Res50 import Res50 as net - elif model_name == 'Res101': - from .SCC_Model.Res101 import Res101 as net - elif model_name == 'Res101_SFCN': - from .SCC_Model.Res101_SFCN import Res101_SFCN as net + def __init__(self, gpus, model_name): + super(CrowdCounter, self).__init__() + + net = getattr( + imp.load_source("network_src", "models/SCC_Model/" + model_name + ".py"), + model_name, + ) self.CCN = net() - if len(gpus)>1: + if len(gpus) > 1: self.CCN = torch.nn.DataParallel(self.CCN, device_ids=gpus).cuda() else: - self.CCN=self.CCN.cuda() + self.CCN = self.CCN.cuda() self.loss_mse_fn = nn.MSELoss().cuda() - + @property def loss(self): return self.loss_mse - - def forward(self, img, gt_map): - density_map = self.CCN(img) - self.loss_mse= self.build_loss(density_map.squeeze(), gt_map.squeeze()) + + def forward(self, img, gt_map): + density_map = self.CCN(img) + self.loss_mse = self.build_loss(density_map.squeeze(), gt_map.squeeze()) return density_map - + def build_loss(self, density_map, gt_data): - loss_mse = self.loss_mse_fn(density_map, gt_data) + loss_mse = self.loss_mse_fn(density_map, gt_data) return loss_mse - def test_forward(self, img): - density_map = self.CCN(img) + def test_forward(self, img): + density_map = self.CCN(img) return density_map - diff --git a/models/M2T2OCC.py b/models/M2T2OCC.py index 00714b6..2175331 100644 --- a/models/M2T2OCC.py +++ b/models/M2T2OCC.py @@ -8,10 +8,10 @@ class CrowdCounter(nn.Module): - def __init__(self, gpus, model_name,loss_1_fn,loss_2_fn): + def __init__(self, gpus, model_name, loss_1_fn, loss_2_fn): super(CrowdCounter, self).__init__() - if model_name == 'CMTL': - from M2T2OCC_Model.CMTL import CMTL as net + if model_name == "CMTL": + from M2T2OCC_Model.CMTL import CMTL as net self.CCN = net() if len(gpus) > 1: @@ -23,17 +23,18 @@ def __init__(self, gpus, model_name,loss_1_fn,loss_2_fn): @property def loss(self): - return self.loss_mse, self.cross_entropy*cfg.LAMBDA_1 - + return self.loss_mse, self.cross_entropy * cfg.LAMBDA_1 def forward(self, img, gt_map=None, gt_cls_label=None): density_map, density_cls_score = self.CCN(img) # pdb.set_trace() - density_cls_prob = F.softmax(density_cls_score,dim=1) + density_cls_prob = F.softmax(density_cls_score, dim=1) - self.loss_mse, self.cross_entropy = self.build_loss(density_map.squeeze(), gt_map.squeeze(), density_cls_prob, gt_cls_label) + self.loss_mse, self.cross_entropy = self.build_loss( + density_map.squeeze(), gt_map.squeeze(), density_cls_prob, gt_cls_label + ) return density_map def build_loss(self, density_map, gt_data, density_cls_score, gt_cls_label): @@ -45,4 +46,3 @@ def build_loss(self, density_map, gt_data, density_cls_score, gt_cls_label): def test_forward(self, img): density_map, density_cls_score = self.CCN(img) return density_map - diff --git a/models/M2T2OCC_Model/CMTL.py b/models/M2T2OCC_Model/CMTL.py index 2c39e26..762569a 100644 --- a/models/M2T2OCC_Model/CMTL.py +++ b/models/M2T2OCC_Model/CMTL.py @@ -11,53 +11,65 @@ class CMTL(nn.Module): - ''' + """ Implementation of CNN-based Cascaded Multi-task Learning of High-level Prior and Density Estimation for Crowd Counting (Sindagi et al.) - ''' + """ def __init__(self, bn=False, num_classes=10): super(CMTL, self).__init__() self.num_classes = num_classes - self.base_layer = nn.Sequential(Conv2d(3, 16, 9, same_padding=True, NL='prelu', bn=bn), - Conv2d(16, 32, 7, same_padding=True, NL='prelu', bn=bn)) + self.base_layer = nn.Sequential( + Conv2d(3, 16, 9, same_padding=True, NL="prelu", bn=bn), + Conv2d(16, 32, 7, same_padding=True, NL="prelu", bn=bn), + ) - self.hl_prior_1 = nn.Sequential(Conv2d(32, 16, 9, same_padding=True, NL='prelu', bn=bn), - nn.MaxPool2d(2), - Conv2d(16, 32, 7, same_padding=True, NL='prelu', bn=bn), - nn.MaxPool2d(2), - Conv2d(32, 16, 7, same_padding=True, NL='prelu', bn=bn), - Conv2d(16, 8, 7, same_padding=True, NL='prelu', bn=bn)) + self.hl_prior_1 = nn.Sequential( + Conv2d(32, 16, 9, same_padding=True, NL="prelu", bn=bn), + nn.MaxPool2d(2), + Conv2d(16, 32, 7, same_padding=True, NL="prelu", bn=bn), + nn.MaxPool2d(2), + Conv2d(32, 16, 7, same_padding=True, NL="prelu", bn=bn), + Conv2d(16, 8, 7, same_padding=True, NL="prelu", bn=bn), + ) - self.hl_prior_2 = nn.Sequential(nn.AdaptiveMaxPool2d((32, 32)), - Conv2d(8, 4, 1, same_padding=True, NL='prelu', bn=bn)) + self.hl_prior_2 = nn.Sequential( + nn.AdaptiveMaxPool2d((32, 32)), + Conv2d(8, 4, 1, same_padding=True, NL="prelu", bn=bn), + ) - self.hl_prior_fc1 = FC(4 * 1024, 512, NL='prelu') - self.hl_prior_fc2 = FC(512, 256, NL='prelu') - self.hl_prior_fc3 = FC(256, self.num_classes, NL='prelu') + self.hl_prior_fc1 = FC(4 * 1024, 512, NL="prelu") + self.hl_prior_fc2 = FC(512, 256, NL="prelu") + self.hl_prior_fc3 = FC(256, self.num_classes, NL="prelu") - self.de_stage_1 = nn.Sequential(Conv2d(32, 20, 7, same_padding=True, NL='prelu', bn=bn), - nn.MaxPool2d(2), - Conv2d(20, 40, 5, same_padding=True, NL='prelu', bn=bn), - nn.MaxPool2d(2), - Conv2d(40, 20, 5, same_padding=True, NL='prelu', bn=bn), - Conv2d(20, 10, 5, same_padding=True, NL='prelu', bn=bn)) + self.de_stage_1 = nn.Sequential( + Conv2d(32, 20, 7, same_padding=True, NL="prelu", bn=bn), + nn.MaxPool2d(2), + Conv2d(20, 40, 5, same_padding=True, NL="prelu", bn=bn), + nn.MaxPool2d(2), + Conv2d(40, 20, 5, same_padding=True, NL="prelu", bn=bn), + Conv2d(20, 10, 5, same_padding=True, NL="prelu", bn=bn), + ) - self.de_stage_2 = nn.Sequential(Conv2d(18, 24, 3, same_padding=True, NL='prelu', bn=bn), - Conv2d(24, 32, 3, same_padding=True, NL='prelu', bn=bn), - nn.ConvTranspose2d(32, 16, 4, stride=2, padding=1, output_padding=0, bias=True), - nn.PReLU(), - nn.ConvTranspose2d(16, 8, 4, stride=2, padding=1, output_padding=0, bias=True), - nn.PReLU(), - Conv2d(8, 1, 1, same_padding=True, NL='relu', bn=bn)) + self.de_stage_2 = nn.Sequential( + Conv2d(18, 24, 3, same_padding=True, NL="prelu", bn=bn), + Conv2d(24, 32, 3, same_padding=True, NL="prelu", bn=bn), + nn.ConvTranspose2d( + 32, 16, 4, stride=2, padding=1, output_padding=0, bias=True + ), + nn.PReLU(), + nn.ConvTranspose2d( + 16, 8, 4, stride=2, padding=1, output_padding=0, bias=True + ), + nn.PReLU(), + Conv2d(8, 1, 1, same_padding=True, NL="relu", bn=bn), + ) # weights_normal_init(self.base_layer, self.hl_prior_1, self.hl_prior_2, self.hl_prior_fc1, self.hl_prior_fc2, \ # self.hl_prior_fc3, self.de_stage_1, self.de_stage_2) initialize_weights(self.modules()) - - def forward(self, im_data): x_base = self.base_layer(im_data) x_hlp1 = self.hl_prior_1(x_base) @@ -71,4 +83,4 @@ def forward(self, im_data): x_den = self.de_stage_1(x_base) x_den = torch.cat((x_hlp1, x_den), 1) x_den = self.de_stage_2(x_den) - return x_den, x_cls \ No newline at end of file + return x_den, x_cls diff --git a/models/M2TCC.py b/models/M2TCC.py index a804b6a..ea7dcde 100644 --- a/models/M2TCC.py +++ b/models/M2TCC.py @@ -6,34 +6,31 @@ class CrowdCounter(nn.Module): - def __init__(self,gpus,model_name,loss_1_fn,loss_2_fn): - super(CrowdCounter, self).__init__() - - if model_name == 'SANet': - from M2TCC_Model.SANet import SANet as net + def __init__(self, gpus, model_name, loss_1_fn, loss_2_fn): + super(CrowdCounter, self).__init__() + if model_name == "SANet": + from M2TCC_Model.SANet import SANet as net self.CCN = net() - if len(gpus)>1: + if len(gpus) > 1: self.CCN = torch.nn.DataParallel(self.CCN, device_ids=gpus).cuda() else: - self.CCN=self.CCN.cuda() + self.CCN = self.CCN.cuda() self.loss_1_fn = loss_1_fn.cuda() self.loss_2_fn = loss_2_fn.cuda() - + @property def loss(self): - return self.loss_1, self.loss_2*cfg.LAMBDA_1 - - def forward(self, img, gt_map): - density_map = self.CCN(img) - self.loss_1= self.loss_1_fn(density_map.squeeze(), gt_map.squeeze()) - self.loss_2= 1 - self.loss_2_fn(density_map, gt_map[:,None,:,:]) - - return density_map + return self.loss_1, self.loss_2 * cfg.LAMBDA_1 + def forward(self, img, gt_map): + density_map = self.CCN(img) + self.loss_1 = self.loss_1_fn(density_map.squeeze(), gt_map.squeeze()) + self.loss_2 = 1 - self.loss_2_fn(density_map, gt_map[:, None, :, :]) - def test_forward(self, img): - density_map = self.CCN(img) return density_map + def test_forward(self, img): + density_map = self.CCN(img) + return density_map diff --git a/models/M2TCC_Model/SANet.py b/models/M2TCC_Model/SANet.py index 9a801a0..df66e99 100644 --- a/models/M2TCC_Model/SANet.py +++ b/models/M2TCC_Model/SANet.py @@ -25,7 +25,9 @@ class BasicDeconv(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, use_bn=False): super(BasicDeconv, self).__init__() self.use_bn = use_bn - self.tconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, bias=not self.use_bn) + self.tconv = nn.ConvTranspose2d( + in_channels, out_channels, kernel_size, stride=stride, bias=not self.use_bn + ) self.bn = nn.InstanceNorm2d(out_channels, affine=True) if self.use_bn else None def forward(self, x): @@ -40,15 +42,19 @@ class SAModule_Head(nn.Module): def __init__(self, in_channels, out_channels, use_bn): super(SAModule_Head, self).__init__() branch_out = out_channels // 4 - self.branch1x1 = BasicConv(in_channels, branch_out, use_bn=use_bn, - kernel_size=1) - self.branch3x3 = BasicConv(in_channels, branch_out, use_bn=use_bn, - kernel_size=3, padding=1) - self.branch5x5 = BasicConv(in_channels, branch_out, use_bn=use_bn, - kernel_size=5, padding=2) - self.branch7x7 = BasicConv(in_channels, branch_out, use_bn=use_bn, - kernel_size=7, padding=3) - + self.branch1x1 = BasicConv( + in_channels, branch_out, use_bn=use_bn, kernel_size=1 + ) + self.branch3x3 = BasicConv( + in_channels, branch_out, use_bn=use_bn, kernel_size=3, padding=1 + ) + self.branch5x5 = BasicConv( + in_channels, branch_out, use_bn=use_bn, kernel_size=5, padding=2 + ) + self.branch7x7 = BasicConv( + in_channels, branch_out, use_bn=use_bn, kernel_size=7, padding=3 + ) + def forward(self, x): branch1x1 = self.branch1x1(x) branch3x3 = self.branch3x3(x) @@ -62,27 +68,28 @@ class SAModule(nn.Module): def __init__(self, in_channels, out_channels, use_bn): super(SAModule, self).__init__() branch_out = out_channels // 4 - self.branch1x1 = BasicConv(in_channels, branch_out, use_bn=use_bn, - kernel_size=1) + self.branch1x1 = BasicConv( + in_channels, branch_out, use_bn=use_bn, kernel_size=1 + ) self.branch3x3 = nn.Sequential( - BasicConv(in_channels, 2*branch_out, use_bn=use_bn, - kernel_size=1), - BasicConv(2*branch_out, branch_out, use_bn=use_bn, - kernel_size=3, padding=1), - ) + BasicConv(in_channels, 2 * branch_out, use_bn=use_bn, kernel_size=1), + BasicConv( + 2 * branch_out, branch_out, use_bn=use_bn, kernel_size=3, padding=1 + ), + ) self.branch5x5 = nn.Sequential( - BasicConv(in_channels, 2*branch_out, use_bn=use_bn, - kernel_size=1), - BasicConv(2*branch_out, branch_out, use_bn=use_bn, - kernel_size=5, padding=2), - ) + BasicConv(in_channels, 2 * branch_out, use_bn=use_bn, kernel_size=1), + BasicConv( + 2 * branch_out, branch_out, use_bn=use_bn, kernel_size=5, padding=2 + ), + ) self.branch7x7 = nn.Sequential( - BasicConv(in_channels, 2*branch_out, use_bn=use_bn, - kernel_size=1), - BasicConv(2*branch_out, branch_out, use_bn=use_bn, - kernel_size=7, padding=3), - ) - + BasicConv(in_channels, 2 * branch_out, use_bn=use_bn, kernel_size=1), + BasicConv( + 2 * branch_out, branch_out, use_bn=use_bn, kernel_size=7, padding=3 + ), + ) + def forward(self, x): branch1x1 = self.branch1x1(x) branch3x3 = self.branch3x3(x) @@ -108,21 +115,21 @@ def __init__(self, gray_input=False, use_bn=True): SAModule(128, 128, use_bn), nn.MaxPool2d(2, 2), SAModule(128, 128, use_bn), - ) + ) self.decoder = nn.Sequential( BasicConv(128, 64, use_bn=use_bn, kernel_size=9, padding=4), BasicDeconv(64, 64, 2, stride=2, use_bn=use_bn), BasicConv(64, 32, use_bn=use_bn, kernel_size=7, padding=3), BasicDeconv(32, 32, 2, stride=2, use_bn=use_bn), - BasicConv(32, 16, use_bn=use_bn, kernel_size=5, padding=2), + BasicConv(32, 16, use_bn=use_bn, kernel_size=5, padding=2), BasicDeconv(16, 16, 2, stride=2, use_bn=use_bn), - BasicConv(16, 16, use_bn=use_bn, kernel_size=3, padding=1), + BasicConv(16, 16, use_bn=use_bn, kernel_size=3, padding=1), BasicConv(16, 1, use_bn=False, kernel_size=1), - ) + ) initialize_weights(self.modules()) def forward(self, x): features = self.encoder(x) out = self.decoder(features) - return out \ No newline at end of file + return out diff --git a/models/SCC_Model/AlexNet.py b/models/SCC_Model/AlexNet.py index 5a033c0..51ce2da 100644 --- a/models/SCC_Model/AlexNet.py +++ b/models/SCC_Model/AlexNet.py @@ -8,6 +8,7 @@ # model_path = '../PyTorch_Pretrained/alexnet-owt-4df8aa71.pth' + class AlexNet(nn.Module): def __init__(self, pretrained=True): super(AlexNet, self).__init__() @@ -15,28 +16,32 @@ def __init__(self, pretrained=True): # if pretrained: # alex.load_state_dict(torch.load(model_path)) features = list(alex.features.children()) - - self.layer1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=4) # original padding is 4 - self.layer1plus = nn.Sequential(nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2)) - self.layer2 = nn.Conv2d(64, 192, kernel_size=5, padding=3) # original padding is 2 - self.layer2plus_to_5 = nn.Sequential(*features[4:12]) - self.de_pred = nn.Sequential(Conv2d(256, 128, 1, same_padding=True, NL='relu'), - Conv2d(128, 1, 1, same_padding=True, NL='relu')) + self.layer1 = nn.Conv2d( + 3, 64, kernel_size=11, stride=4, padding=4 + ) # original padding is 4 + self.layer1plus = nn.Sequential( + nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2) + ) + self.layer2 = nn.Conv2d( + 64, 192, kernel_size=5, padding=3 + ) # original padding is 2 + self.layer2plus_to_5 = nn.Sequential(*features[4:12]) + self.de_pred = nn.Sequential( + Conv2d(256, 128, 1, same_padding=True, NL="relu"), + Conv2d(128, 1, 1, same_padding=True, NL="relu"), + ) self.layer1.load_state_dict(alex.features[0].state_dict()) self.layer2.load_state_dict(alex.features[3].state_dict()) - - def forward(self, x): - x = self.layer1(x) - x = self.layer1plus(x) + x = self.layer1(x) + x = self.layer1plus(x) x = self.layer2(x) - x = self.layer2plus_to_5(x) + x = self.layer2plus_to_5(x) x = self.de_pred(x) - x = F.upsample(x,scale_factor=16) + x = F.upsample(x, scale_factor=16) - return x \ No newline at end of file + return x diff --git a/models/SCC_Model/CSRNet.py b/models/SCC_Model/CSRNet.py index c6d24b0..4252d1e 100644 --- a/models/SCC_Model/CSRNet.py +++ b/models/SCC_Model/CSRNet.py @@ -3,25 +3,28 @@ from torchvision import models import torch.nn.functional as F + class CSRNet(nn.Module): def __init__(self, load_weights=False): super(CSRNet, self).__init__() self.seen = 0 - self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512] - self.backend_feat = [512, 512, 512,256,128,64] + self.frontend_feat = [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512] + self.backend_feat = [512, 512, 512, 256, 128, 64] self.frontend = make_layers(self.frontend_feat) - self.backend = make_layers(self.backend_feat,in_channels = 512,dilation = True) + self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True) self.output_layer = nn.Conv2d(64, 1, kernel_size=1) if not load_weights: - mod = models.vgg16(pretrained = True) + mod = models.vgg16(pretrained=True) self._initialize_weights() self.frontend.load_state_dict(mod.features[0:23].state_dict()) - def forward(self,x): + + def forward(self, x): x = self.frontend(x) x = self.backend(x) x = self.output_layer(x) - x = F.upsample(x,scale_factor=8) + x = F.upsample(x, scale_factor=8) return x + def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): @@ -31,22 +34,24 @@ def _initialize_weights(self): elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) - - -def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False): + + +def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False): if dilation: d_rate = 2 else: d_rate = 1 layers = [] for v in cfg: - if v == 'M': + if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: - conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate) + conv2d = nn.Conv2d( + in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate + ) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v - return nn.Sequential(*layers) + return nn.Sequential(*layers) diff --git a/models/SCC_Model/MCNN.py b/models/SCC_Model/MCNN.py index 57fca88..22fd556 100644 --- a/models/SCC_Model/MCNN.py +++ b/models/SCC_Model/MCNN.py @@ -4,45 +4,52 @@ import torch.nn.functional as F from misc.utils import * + class MCNN(nn.Module): - ''' - Multi-column CNN + """ + Multi-column CNN -Implementation of Single Image Crowd Counting via Multi-column CNN (Zhang et al.) - ''' - + """ + def __init__(self, bn=False): super(MCNN, self).__init__() - - self.branch1 = nn.Sequential(Conv2d( 3, 16, 9, same_padding=True, bn=bn), - nn.MaxPool2d(2), - Conv2d(16, 32, 7, same_padding=True, bn=bn), - nn.MaxPool2d(2), - Conv2d(32, 16, 7, same_padding=True, bn=bn), - Conv2d(16, 8, 7, same_padding=True, bn=bn)) - - self.branch2 = nn.Sequential(Conv2d( 3, 20, 7, same_padding=True, bn=bn), - nn.MaxPool2d(2), - Conv2d(20, 40, 5, same_padding=True, bn=bn), - nn.MaxPool2d(2), - Conv2d(40, 20, 5, same_padding=True, bn=bn), - Conv2d(20, 10, 5, same_padding=True, bn=bn)) - - self.branch3 = nn.Sequential(Conv2d( 3, 24, 5, same_padding=True, bn=bn), - nn.MaxPool2d(2), - Conv2d(24, 48, 3, same_padding=True, bn=bn), - nn.MaxPool2d(2), - Conv2d(48, 24, 3, same_padding=True, bn=bn), - Conv2d(24, 12, 3, same_padding=True, bn=bn)) - - self.fuse = nn.Sequential(Conv2d( 30, 1, 1, same_padding=True, bn=bn)) - - initialize_weights(self.modules()) - + + self.branch1 = nn.Sequential( + Conv2d(3, 16, 9, same_padding=True, bn=bn), + nn.MaxPool2d(2), + Conv2d(16, 32, 7, same_padding=True, bn=bn), + nn.MaxPool2d(2), + Conv2d(32, 16, 7, same_padding=True, bn=bn), + Conv2d(16, 8, 7, same_padding=True, bn=bn), + ) + + self.branch2 = nn.Sequential( + Conv2d(3, 20, 7, same_padding=True, bn=bn), + nn.MaxPool2d(2), + Conv2d(20, 40, 5, same_padding=True, bn=bn), + nn.MaxPool2d(2), + Conv2d(40, 20, 5, same_padding=True, bn=bn), + Conv2d(20, 10, 5, same_padding=True, bn=bn), + ) + + self.branch3 = nn.Sequential( + Conv2d(3, 24, 5, same_padding=True, bn=bn), + nn.MaxPool2d(2), + Conv2d(24, 48, 3, same_padding=True, bn=bn), + nn.MaxPool2d(2), + Conv2d(48, 24, 3, same_padding=True, bn=bn), + Conv2d(24, 12, 3, same_padding=True, bn=bn), + ) + + self.fuse = nn.Sequential(Conv2d(30, 1, 1, same_padding=True, bn=bn)) + + initialize_weights(self.modules()) + def forward(self, im_data): x1 = self.branch1(im_data) x2 = self.branch2(im_data) x3 = self.branch3(im_data) - x = torch.cat((x1,x2,x3),1) + x = torch.cat((x1, x2, x3), 1) x = self.fuse(x) - x = F.upsample(x,scale_factor=4) + x = F.upsample(x, scale_factor=4) return x diff --git a/models/SCC_Model/Res101.py b/models/SCC_Model/Res101.py index 937b347..accd0a2 100644 --- a/models/SCC_Model/Res101.py +++ b/models/SCC_Model/Res101.py @@ -9,14 +9,17 @@ import pdb -model_path = '../PyTorch_Pretrained/resnet101-5d3b4d8f.pth' +model_path = "../PyTorch_Pretrained/resnet101-5d3b4d8f.pth" + class Res101(nn.Module): def __init__(self, pretrained=True): super(Res101, self).__init__() - self.de_pred = nn.Sequential(Conv2d(1024, 128, 1, same_padding=True, NL='relu'), - Conv2d(128, 1, 1, same_padding=True, NL='relu')) + self.de_pred = nn.Sequential( + Conv2d(1024, 128, 1, same_padding=True, NL="relu"), + Conv2d(128, 1, 1, same_padding=True, NL="relu"), + ) # initialize_weights(self.modules()) @@ -26,22 +29,18 @@ def __init__(self, pretrained=True): self.frontend = nn.Sequential( res.conv1, res.bn1, res.relu, res.maxpool, res.layer1, res.layer2 ) - self.own_reslayer_3 = make_res_layer(Bottleneck, 256, 23, stride=1) + self.own_reslayer_3 = make_res_layer(Bottleneck, 256, 23, stride=1) self.own_reslayer_3.load_state_dict(res.layer3.state_dict()) - - - - def forward(self,x): + def forward(self, x): - x = self.frontend(x) x = self.own_reslayer_3(x) x = self.de_pred(x) - x = F.upsample(x,scale_factor=8) + x = F.upsample(x, scale_factor=8) return x def _initialize_weights(self): @@ -52,17 +51,22 @@ def _initialize_weights(self): m.bias.data.fill_(0) elif isinstance(m, nn.BatchNorm2d): m.weight.fill_(1) - m.bias.data.fill_(0) + m.bias.data.fill_(0) def make_res_layer(block, planes, blocks, stride=1): downsample = None - inplanes=512 + inplanes = 512 if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( - nn.Conv2d(inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), nn.BatchNorm2d(planes * block.expansion), ) @@ -72,7 +76,7 @@ def make_res_layer(block, planes, blocks, stride=1): for i in range(1, blocks): layers.append(block(inplanes, planes)) - return nn.Sequential(*layers) + return nn.Sequential(*layers) class Bottleneck(nn.Module): @@ -82,10 +86,13 @@ def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False + ) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample @@ -111,4 +118,4 @@ def forward(self, x): out += residual out = self.relu(out) - return out \ No newline at end of file + return out diff --git a/models/SCC_Model/Res101_SFCN.py b/models/SCC_Model/Res101_SFCN.py index 804367b..832cd79 100644 --- a/models/SCC_Model/Res101_SFCN.py +++ b/models/SCC_Model/Res101_SFCN.py @@ -2,7 +2,7 @@ import torch from torchvision import models -from misc.layer import convDU,convLR +from misc.layer import convDU, convLR import torch.nn.functional as F from misc.utils import * @@ -11,20 +11,19 @@ # model_path = '../PyTorch_Pretrained/resnet101-5d3b4d8f.pth' + class Res101_SFCN(nn.Module): def __init__(self, pretrained=True): super(Res101_SFCN, self).__init__() self.seen = 0 - self.backend_feat = [512, 512, 512,256,128,64] + self.backend_feat = [512, 512, 512, 256, 128, 64] self.frontend = [] - - self.backend = make_layers(self.backend_feat,in_channels = 1024,dilation = True) - self.convDU = convDU(in_out_channels=64,kernel_size=(1,9)) - self.convLR = convLR(in_out_channels=64,kernel_size=(9,1)) - - self.output_layer = nn.Sequential(nn.Conv2d(64, 1, kernel_size=1),nn.ReLU()) + self.backend = make_layers(self.backend_feat, in_channels=1024, dilation=True) + self.convDU = convDU(in_out_channels=64, kernel_size=(1, 9)) + self.convLR = convLR(in_out_channels=64, kernel_size=(9, 1)) + self.output_layer = nn.Sequential(nn.Conv2d(64, 1, kernel_size=1), nn.ReLU()) initialize_weights(self.modules()) @@ -34,13 +33,10 @@ def __init__(self, pretrained=True): self.frontend = nn.Sequential( res.conv1, res.bn1, res.relu, res.maxpool, res.layer1, res.layer2 ) - self.own_reslayer_3 = make_res_layer(Bottleneck, 256, 23, stride=1) + self.own_reslayer_3 = make_res_layer(Bottleneck, 256, 23, stride=1) self.own_reslayer_3.load_state_dict(res.layer3.state_dict()) - - - - def forward(self,x): + def forward(self, x): x = self.frontend(x) x = self.own_reslayer_3(x) @@ -51,37 +47,44 @@ def forward(self,x): x = self.convLR(x) x = self.output_layer(x) - x = F.upsample(x,scale_factor=8) + x = F.upsample(x, scale_factor=8) return x - - -def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False): + + +def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False): if dilation: d_rate = 2 else: d_rate = 1 layers = [] for v in cfg: - if v == 'M': + if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: - conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate) + conv2d = nn.Conv2d( + in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate + ) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v - return nn.Sequential(*layers) + return nn.Sequential(*layers) def make_res_layer(block, planes, blocks, stride=1): downsample = None - inplanes=512 + inplanes = 512 if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( - nn.Conv2d(inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), nn.BatchNorm2d(planes * block.expansion), ) @@ -91,7 +94,7 @@ def make_res_layer(block, planes, blocks, stride=1): for i in range(1, blocks): layers.append(block(inplanes, planes)) - return nn.Sequential(*layers) + return nn.Sequential(*layers) class Bottleneck(nn.Module): @@ -101,10 +104,13 @@ def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False + ) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample @@ -130,4 +136,4 @@ def forward(self, x): out += residual out = self.relu(out) - return out \ No newline at end of file + return out diff --git a/models/SCC_Model/Res50.py b/models/SCC_Model/Res50.py index 11461d5..a2c6134 100644 --- a/models/SCC_Model/Res50.py +++ b/models/SCC_Model/Res50.py @@ -11,12 +11,15 @@ # model_path = '../PyTorch_Pretrained/resnet50-19c8e357.pth' + class Res50(nn.Module): - def __init__(self, pretrained=True): + def __init__(self, pretrained=True): super(Res50, self).__init__() - self.de_pred = nn.Sequential(Conv2d(1024, 128, 1, same_padding=True, NL='relu'), - Conv2d(128, 1, 1, same_padding=True, NL='relu')) + self.de_pred = nn.Sequential( + Conv2d(1024, 128, 1, same_padding=True, NL="relu"), + Conv2d(128, 1, 1, same_padding=True, NL="relu"), + ) initialize_weights(self.modules()) @@ -26,22 +29,18 @@ def __init__(self, pretrained=True): self.frontend = nn.Sequential( res.conv1, res.bn1, res.relu, res.maxpool, res.layer1, res.layer2 ) - self.own_reslayer_3 = make_res_layer(Bottleneck, 256, 6, stride=1) + self.own_reslayer_3 = make_res_layer(Bottleneck, 256, 6, stride=1) self.own_reslayer_3.load_state_dict(res.layer3.state_dict()) - - - - def forward(self,x): + def forward(self, x): - x = self.frontend(x) x = self.own_reslayer_3(x) x = self.de_pred(x) - x = F.upsample(x,scale_factor=8) + x = F.upsample(x, scale_factor=8) return x def _initialize_weights(self): @@ -52,17 +51,22 @@ def _initialize_weights(self): m.bias.data.fill_(0) elif isinstance(m, nn.BatchNorm2d): m.weight.fill_(1) - m.bias.data.fill_(0) + m.bias.data.fill_(0) def make_res_layer(block, planes, blocks, stride=1): downsample = None - inplanes=512 + inplanes = 512 if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( - nn.Conv2d(inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), nn.BatchNorm2d(planes * block.expansion), ) @@ -72,7 +76,7 @@ def make_res_layer(block, planes, blocks, stride=1): for i in range(1, blocks): layers.append(block(inplanes, planes)) - return nn.Sequential(*layers) + return nn.Sequential(*layers) class Bottleneck(nn.Module): @@ -82,10 +86,13 @@ def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False + ) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample @@ -111,4 +118,4 @@ def forward(self, x): out += residual out = self.relu(out) - return out \ No newline at end of file + return out diff --git a/models/SCC_Model/VGG.py b/models/SCC_Model/VGG.py index 70abcb6..ec7c0d5 100644 --- a/models/SCC_Model/VGG.py +++ b/models/SCC_Model/VGG.py @@ -8,6 +8,7 @@ # model_path = '../PyTorch_Pretrained/vgg16-397923af.pth' + class VGG(nn.Module): def __init__(self, pretrained=True): super(VGG, self).__init__() @@ -17,16 +18,15 @@ def __init__(self, pretrained=True): features = list(vgg.features.children()) self.features4 = nn.Sequential(*features[0:23]) - - self.de_pred = nn.Sequential(Conv2d(512, 128, 1, same_padding=True, NL='relu'), - Conv2d(128, 1, 1, same_padding=True, NL='relu')) - - + self.de_pred = nn.Sequential( + Conv2d(512, 128, 1, same_padding=True, NL="relu"), + Conv2d(128, 1, 1, same_padding=True, NL="relu"), + ) def forward(self, x): - x = self.features4(x) + x = self.features4(x) x = self.de_pred(x) - x = F.upsample(x,scale_factor=8) + x = F.upsample(x, scale_factor=8) - return x \ No newline at end of file + return x diff --git a/models/SCC_Model/VGG_decoder.py b/models/SCC_Model/VGG_decoder.py index 44b724c..0c5f9d7 100644 --- a/models/SCC_Model/VGG_decoder.py +++ b/models/SCC_Model/VGG_decoder.py @@ -8,6 +8,7 @@ # model_path = '../PyTorch_Pretrained/vgg16-397923af.pth' + class VGG_decoder(nn.Module): def __init__(self, pretrained=True): super(VGG_decoder, self).__init__() @@ -17,19 +18,25 @@ def __init__(self, pretrained=True): features = list(vgg.features.children()) self.features4 = nn.Sequential(*features[0:23]) - - self.de_pred = nn.Sequential(Conv2d( 512, 128, 3, same_padding=True, NL='relu'), - nn.ConvTranspose2d(128,64,4,stride=2,padding=1,output_padding=0,bias=True), - nn.ReLU(), - nn.ConvTranspose2d(64,32,4,stride=2,padding=1,output_padding=0,bias=True), - nn.ReLU(), - nn.ConvTranspose2d(32,16,4,stride=2,padding=1,output_padding=0,bias=True), - nn.ReLU(), - Conv2d(16, 1, 1, same_padding=True, NL='relu')) - + self.de_pred = nn.Sequential( + Conv2d(512, 128, 3, same_padding=True, NL="relu"), + nn.ConvTranspose2d( + 128, 64, 4, stride=2, padding=1, output_padding=0, bias=True + ), + nn.ReLU(), + nn.ConvTranspose2d( + 64, 32, 4, stride=2, padding=1, output_padding=0, bias=True + ), + nn.ReLU(), + nn.ConvTranspose2d( + 32, 16, 4, stride=2, padding=1, output_padding=0, bias=True + ), + nn.ReLU(), + Conv2d(16, 1, 1, same_padding=True, NL="relu"), + ) def forward(self, x): - x = self.features4(x) + x = self.features4(x) x = self.de_pred(x) - return x \ No newline at end of file + return x diff --git a/models/vgg.py b/models/vgg.py new file mode 100644 index 0000000..e5ba5e0 --- /dev/null +++ b/models/vgg.py @@ -0,0 +1,56 @@ +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +import torch +from torch.nn import functional as F + +__all__ = ['vgg19'] +model_urls = { + 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', +} + +class VGG(nn.Module): + def __init__(self, features): + super(VGG, self).__init__() + self.features = features + self.reg_layer = nn.Sequential( + nn.Conv2d(512, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 128, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 1, 1) + ) + + def forward(self, x): + x = self.features(x) + x = F.upsample_bilinear(x, scale_factor=2) + x = self.reg_layer(x) + return torch.abs(x) + + +def make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + +cfg = { + 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512] +} + +def vgg19(): + """VGG 19-layer model (configuration "E") + model pre-trained on ImageNet + """ + model = VGG(make_layers(cfg['E'])) + model.load_state_dict(model_zoo.load_url(model_urls['vgg19']), strict=False) + return model + diff --git a/preprocess_dataset.py b/preprocess_dataset.py new file mode 100644 index 0000000..add9478 --- /dev/null +++ b/preprocess_dataset.py @@ -0,0 +1,105 @@ +from scipy.io import loadmat +from PIL import Image +import numpy as np +import os +from glob import glob +import cv2 +import argparse + + +def cal_new_size(im_h, im_w, min_size, max_size): + if im_h < im_w: + if im_h < min_size: + ratio = 1.0 * min_size / im_h + im_h = min_size + im_w = round(im_w*ratio) + elif im_h > max_size: + ratio = 1.0 * max_size / im_h + im_h = max_size + im_w = round(im_w*ratio) + else: + ratio = 1.0 + else: + if im_w < min_size: + ratio = 1.0 * min_size / im_w + im_w = min_size + im_h = round(im_h*ratio) + elif im_w > max_size: + ratio = 1.0 * max_size / im_w + im_w = max_size + im_h = round(im_h*ratio) + else: + ratio = 1.0 + return im_h, im_w, ratio + + +def find_dis(point): + square = np.sum(point*points, axis=1) + dis = np.sqrt(np.maximum(square[:, None] - 2*np.matmul(point, point.T) + square[None, :], 0.0)) + dis = np.mean(np.partition(dis, 3, axis=1)[:, 1:4], axis=1, keepdims=True) + return dis + +def generate_data(im_path): + im = Image.open(im_path) + im_w, im_h = im.size + mat_path = im_path.replace('.jpg', '_ann.mat') + points = loadmat(mat_path)['annPoints'].astype(np.float32) + idx_mask = (points[:, 0] >= 0) * (points[:, 0] <= im_w) * (points[:, 1] >= 0) * (points[:, 1] <= im_h) + points = points[idx_mask] + im_h, im_w, rr = cal_new_size(im_h, im_w, min_size, max_size) + im = np.array(im) + if rr != 1.0: + im = cv2.resize(np.array(im), (im_w, im_h), cv2.INTER_CUBIC) + points = points * rr + return Image.fromarray(im), points + + +def parse_args(): + parser = argparse.ArgumentParser(description='Test ') + parser.add_argument('--origin-dir', default='/home/teddy/UCF-QNRF_ECCV18', + help='original data directory') + parser.add_argument('--data-dir', default='/home/teddy/UCF-Train-Val-Test', + help='processed data directory') + args = parser.parse_args() + return args + +if __name__ == '__main__': + args = parse_args() + save_dir = args.data_dir + min_size = 512 + max_size = 2048 + + for phase in ['Train', 'Test']: + sub_dir = os.path.join(args.origin_dir, phase) + if phase == 'Train': + sub_phase_list = ['train', 'val'] + for sub_phase in sub_phase_list: + sub_save_dir = os.path.join(save_dir, sub_phase) + if not os.path.exists(sub_save_dir): + os.makedirs(sub_save_dir) + with open('{}.txt'.format(sub_phase)) as f: + for i in f: + im_path = os.path.join(sub_dir, i.strip()) + name = os.path.basename(im_path) + print(name) + im, points = generate_data(im_path) + if sub_phase == 'train': + dis = find_dis(points) + points = np.concatenate((points, dis), axis=1) + im_save_path = os.path.join(sub_save_dir, name) + im.save(im_save_path) + gd_save_path = im_save_path.replace('jpg', 'npy') + np.save(gd_save_path, points) + else: + sub_save_dir = os.path.join(save_dir, 'test') + if not os.path.exists(sub_save_dir): + os.makedirs(sub_save_dir) + im_list = glob(os.path.join(sub_dir, '*jpg')) + for im_path in im_list: + name = os.path.basename(im_path) + print(name) + im, points = generate_data(im_path) + im_save_path = os.path.join(sub_save_dir, name) + im.save(im_save_path) + gd_save_path = im_save_path.replace('jpg', 'npy') + np.save(gd_save_path, points) diff --git a/test.py b/test.py index 148a2a5..5c62071 100644 --- a/test.py +++ b/test.py @@ -18,140 +18,150 @@ torch.cuda.set_device(0) torch.backends.cudnn.benchmark = True -exp_name = '../SHHB_results' +exp_name = "../SHHB_results" if not os.path.exists(exp_name): os.mkdir(exp_name) -if not os.path.exists(exp_name+'/pred'): - os.mkdir(exp_name+'/pred') - -if not os.path.exists(exp_name+'/gt'): - os.mkdir(exp_name+'/gt') - -mean_std = ([0.452016860247, 0.447249650955, 0.431981861591],[0.23242045939, 0.224925786257, 0.221840232611]) -img_transform = standard_transforms.Compose([ - standard_transforms.ToTensor(), - standard_transforms.Normalize(*mean_std) - ]) -restore = standard_transforms.Compose([ - own_transforms.DeNormalize(*mean_std), - standard_transforms.ToPILImage() - ]) +if not os.path.exists(exp_name + "/pred"): + os.mkdir(exp_name + "/pred") + +if not os.path.exists(exp_name + "/gt"): + os.mkdir(exp_name + "/gt") + +mean_std = ( + [0.452016860247, 0.447249650955, 0.431981861591], + [0.23242045939, 0.224925786257, 0.221840232611], +) +img_transform = standard_transforms.Compose( + [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] +) +restore = standard_transforms.Compose( + [own_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage()] +) pil_to_tensor = standard_transforms.ToTensor() -dataRoot = '../ProcessedData/shanghaitech_part_B/test' +dataRoot = "../ProcessedData/shanghaitech_part_B/test" + +model_path = "../08-SANet_all_ep_57_mae_42.4_mse_85.4.pth" -model_path = 'xxx.pth' def main(): - - file_list = [filename for root,dirs,filename in os.walk(dataRoot+'/img/')] + + file_list = [filename for root, dirs, filename in os.walk(dataRoot + "/img/")] test(file_list[0], model_path) - + def test(file_list, model_path): - net = CrowdCounter(cfg.GPU_ID,cfg.NET) - net.load_state_dict(torch.load(model_path)) + net = CrowdCounter(cfg.GPU_ID, cfg.NET) + + state_dict = torch.load(model_path) + state_dict = {k.replace("module.", ""):v for k,v in state_dict.items()} + net.load_state_dict(state_dict) net.cuda() net.eval() - f1 = plt.figure(1) gts = [] preds = [] for filename in file_list: - print( filename ) - imgname = dataRoot + '/img/' + filename - filename_no_ext = filename.split('.')[0] + print(filename) + imgname = dataRoot + "/img/" + filename + filename_no_ext = filename.split(".")[0] - denname = dataRoot + '/den/' + filename_no_ext + '.csv' + denname = dataRoot + "/den/" + filename_no_ext + ".csv" - den = pd.read_csv(denname, sep=',',header=None).values + den = pd.read_csv(denname, sep=",", header=None).values den = den.astype(np.float32, copy=False) img = Image.open(imgname) - if img.mode == 'L': - img = img.convert('RGB') - + if img.mode == "L": + img = img.convert("RGB") img = img_transform(img) gt = np.sum(den) with torch.no_grad(): - img = Variable(img[None,:,:,:]).cuda() + img = Variable(img[None, :, :, :]).cuda() pred_map = net.test_forward(img) - sio.savemat(exp_name+'/pred/'+filename_no_ext+'.mat',{'data':pred_map.squeeze().cpu().numpy()/100.}) - sio.savemat(exp_name+'/gt/'+filename_no_ext+'.mat',{'data':den}) + sio.savemat( + exp_name + "/pred/" + filename_no_ext + ".mat", + {"data": pred_map.squeeze().cpu().numpy() / 100.0}, + ) + sio.savemat(exp_name + "/gt/" + filename_no_ext + ".mat", {"data": den}) - pred_map = pred_map.cpu().data.numpy()[0,0,:,:] + pred_map = pred_map.cpu().data.numpy()[0, 0, :, :] + pred = np.sum(pred_map) / 100.0 + pred_map = pred_map / np.max(pred_map + 1e-20) - pred = np.sum(pred_map)/100.0 - pred_map = pred_map/np.max(pred_map+1e-20) - - den = den/np.max(den+1e-20) + den = den / np.max(den + 1e-20) - den_frame = plt.gca() - plt.imshow(den, 'jet') + plt.imshow(den, "jet") den_frame.axes.get_yaxis().set_visible(False) den_frame.axes.get_xaxis().set_visible(False) - den_frame.spines['top'].set_visible(False) - den_frame.spines['bottom'].set_visible(False) - den_frame.spines['left'].set_visible(False) - den_frame.spines['right'].set_visible(False) - plt.savefig(exp_name+'/'+filename_no_ext+'_gt_'+str(int(gt))+'.png',\ - bbox_inches='tight',pad_inches=0,dpi=150) + den_frame.spines["top"].set_visible(False) + den_frame.spines["bottom"].set_visible(False) + den_frame.spines["left"].set_visible(False) + den_frame.spines["right"].set_visible(False) + plt.savefig( + exp_name + "/" + filename_no_ext + "_gt_" + str(int(gt)) + ".png", + bbox_inches="tight", + pad_inches=0, + dpi=150, + ) plt.close() - + # sio.savemat(exp_name+'/'+filename_no_ext+'_gt_'+str(int(gt))+'.mat',{'data':den}) pred_frame = plt.gca() - plt.imshow(pred_map, 'jet') + plt.imshow(pred_map, "jet") pred_frame.axes.get_yaxis().set_visible(False) pred_frame.axes.get_xaxis().set_visible(False) - pred_frame.spines['top'].set_visible(False) - pred_frame.spines['bottom'].set_visible(False) - pred_frame.spines['left'].set_visible(False) - pred_frame.spines['right'].set_visible(False) - plt.savefig(exp_name+'/'+filename_no_ext+'_pred_'+str(float(pred))+'.png',\ - bbox_inches='tight',pad_inches=0,dpi=150) + pred_frame.spines["top"].set_visible(False) + pred_frame.spines["bottom"].set_visible(False) + pred_frame.spines["left"].set_visible(False) + pred_frame.spines["right"].set_visible(False) + plt.savefig( + exp_name + "/" + filename_no_ext + "_pred_" + str(float(pred)) + ".png", + bbox_inches="tight", + pad_inches=0, + dpi=150, + ) plt.close() # sio.savemat(exp_name+'/'+filename_no_ext+'_pred_'+str(float(pred))+'.mat',{'data':pred_map}) - diff = den-pred_map + diff = den - pred_map diff_frame = plt.gca() - plt.imshow(diff, 'jet') + plt.imshow(diff, "jet") plt.colorbar() diff_frame.axes.get_yaxis().set_visible(False) diff_frame.axes.get_xaxis().set_visible(False) - diff_frame.spines['top'].set_visible(False) - diff_frame.spines['bottom'].set_visible(False) - diff_frame.spines['left'].set_visible(False) - diff_frame.spines['right'].set_visible(False) - plt.savefig(exp_name+'/'+filename_no_ext+'_diff.png',\ - bbox_inches='tight',pad_inches=0,dpi=150) + diff_frame.spines["top"].set_visible(False) + diff_frame.spines["bottom"].set_visible(False) + diff_frame.spines["left"].set_visible(False) + diff_frame.spines["right"].set_visible(False) + plt.savefig( + exp_name + "/" + filename_no_ext + "_diff.png", + bbox_inches="tight", + pad_inches=0, + dpi=150, + ) plt.close() # sio.savemat(exp_name+'/'+filename_no_ext+'_diff.mat',{'data':diff}) - - -if __name__ == '__main__': +if __name__ == "__main__": main() - - - - diff --git a/test_bay.py b/test_bay.py new file mode 100644 index 0000000..1fb5c12 --- /dev/null +++ b/test_bay.py @@ -0,0 +1,48 @@ +import torch +import os +import numpy as np +from datasets.crowd_sh import Crowd +from models.vgg import vgg19 +import argparse + +args = None + + +def parse_args(): + parser = argparse.ArgumentParser(description='Test ') + parser.add_argument('--data-dir', default='/home/teddy/UCF-Train-Val-Test', + help='training data directory') + parser.add_argument('--save-dir', default='/home/teddy/vgg', + help='model directory') + parser.add_argument('--device', default='0', help='assign device') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + os.environ['CUDA_VISIBLE_DEVICES'] = args.device.strip() # set vis gpu + + datasets = Crowd(os.path.join(args.data_dir, 'test'), 512, 8, is_gray=False, method='val') + dataloader = torch.utils.data.DataLoader(datasets, 1, shuffle=False, + num_workers=8, pin_memory=False) + model = vgg19() + device = torch.device('cuda') + model.to(device) + model.load_state_dict(torch.load(os.path.join(args.save_dir, 'best_model.pth'), device)) + epoch_minus = [] + + for inputs, count, name in dataloader: + inputs = inputs.to(device) + assert inputs.size(0) == 1, 'the batch size should equal to 1' + with torch.set_grad_enabled(False): + outputs = model(inputs) + temp_minu = count[0].item() - torch.sum(outputs).item() + print(name, temp_minu, count[0].item(), torch.sum(outputs).item()) + epoch_minus.append(temp_minu) + + epoch_minus = np.array(epoch_minus) + mse = np.sqrt(np.mean(np.square(epoch_minus))) + mae = np.mean(np.abs(epoch_minus)) + log_str = 'Final Test: mae {}, mse {}'.format(mae, mse) + print(log_str) diff --git a/train.py b/train.py index 9462495..50def36 100644 --- a/train.py +++ b/train.py @@ -1,10 +1,11 @@ import os import numpy as np import torch +import imp from config import cfg -#------------prepare enviroment------------ +# ------------prepare enviroment------------ seed = cfg.SEED if seed is not None: np.random.seed(seed) @@ -12,52 +13,42 @@ torch.cuda.manual_seed(seed) gpus = cfg.GPU_ID -if len(gpus)==1: +if len(gpus) == 1: torch.cuda.set_device(gpus[0]) torch.backends.cudnn.benchmark = True -#------------prepare data loader------------ +# ------------prepare data loader------------ data_mode = cfg.DATASET -if data_mode is 'SHHA': - from datasets.SHHA.loading_data import loading_data - from datasets.SHHA.setting import cfg_data -elif data_mode is 'SHHB': - from datasets.SHHB.loading_data import loading_data - from datasets.SHHB.setting import cfg_data -elif data_mode is 'QNRF': - from datasets.QNRF.loading_data import loading_data - from datasets.QNRF.setting import cfg_data -elif data_mode is 'UCF50': - from datasets.UCF50.loading_data import loading_data - from datasets.UCF50.setting import cfg_data -elif data_mode is 'WE': - from datasets.WE.loading_data import loading_data - from datasets.WE.setting import cfg_data -elif data_mode is 'GCC': - from datasets.GCC.loading_data import loading_data - from datasets.GCC.setting import cfg_data -elif data_mode is 'Mall': - from datasets.Mall.loading_data import loading_data - from datasets.Mall.setting import cfg_data -elif data_mode is 'UCSD': - from datasets.UCSD.loading_data import loading_data - from datasets.UCSD.setting import cfg_data - - -#------------Prepare Trainer------------ + +dataset_import_path = "datasets/" + data_mode +loading_data = getattr( + imp.load_source("loader", dataset_import_path + "/loading_data.py"), "loading_data" +) +cfg_data = getattr( + imp.load_source("settings", dataset_import_path + "/setting.py"), "cfg_data" +) + +# ------------Prepare Trainer------------ net = cfg.NET -if net in ['MCNN', 'AlexNet', 'VGG', 'VGG_DECODER', 'Res50', 'Res101', 'CSRNet','Res101_SFCN']: +if net in [ + "MCNN", + "AlexNet", + "VGG", + "VGG_DECODER", + "Res50", + "Res101", + "CSRNet", + "Res101_SFCN", +]: from trainer import Trainer -elif net in ['SANet']: - from trainer_for_M2TCC import Trainer # double losses but signle output -elif net in ['CMTL']: - from trainer_for_CMTL import Trainer # double losses and double outputs -elif net in ['PCCNet']: - from trainer_for_M3T3OCC import Trainer - -#------------Start Training------------ +elif net in ["SANet"]: + from trainer_for_M2TCC import Trainer # double losses but single output +elif net in ["CMTL"]: + from trainer_for_CMTL import Trainer # double losses and double outputs + +# ------------Start Training------------ pwd = os.path.split(os.path.realpath(__file__))[0] -cc_trainer = Trainer(loading_data,cfg_data,pwd) +cc_trainer = Trainer(loading_data, cfg_data, pwd) cc_trainer.forward() diff --git a/train_bay.py b/train_bay.py new file mode 100644 index 0000000..90736de --- /dev/null +++ b/train_bay.py @@ -0,0 +1,59 @@ +from utils.regression_trainer import RegTrainer +import argparse +import os +import torch +args = None + +def parse_args(): + parser = argparse.ArgumentParser(description='Train ') + parser.add_argument('--data-dir', default='/home/teddy/UCF-Train-Val-Test', + help='training data directory') + parser.add_argument('--save-dir', default='/home/teddy/vgg', + help='directory to save models.') + + parser.add_argument('--lr', type=float, default=1e-5, + help='the initial learning rate') + parser.add_argument('--weight-decay', type=float, default=1e-4, + help='the weight decay') + parser.add_argument('--resume', default='', + help='the path of resume training model') + parser.add_argument('--max-model-num', type=int, default=1, + help='max models num to save ') + parser.add_argument('--max-epoch', type=int, default=1000, + help='max training epoch') + parser.add_argument('--val-epoch', type=int, default=5, + help='the num of steps to log training information') + parser.add_argument('--val-start', type=int, default=600, + help='the epoch start to val') + + parser.add_argument('--batch-size', type=int, default=1, + help='train batch size') + parser.add_argument('--device', default='0', help='assign device') + parser.add_argument('--num-workers', type=int, default=8, + help='the num of training process') + + parser.add_argument('--is-gray', type=bool, default=False, + help='whether the input image is gray') + parser.add_argument('--crop-size', type=int, default=512, + help='the crop size of the train image') + parser.add_argument('--downsample-ratio', type=int, default=8, + help='downsample ratio') + + parser.add_argument('--use-background', type=bool, default=True, + help='whether to use background modelling') + parser.add_argument('--sigma', type=float, default=8.0, + help='sigma for likelihood') + parser.add_argument('--background-ratio', type=float, default=1.0, + help='background ratio') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + print(args.batch_size) + torch.backends.cudnn.benchmark = True + os.environ['CUDA_VISIBLE_DEVICES'] = args.device.strip() # set vis gpu + trainer = RegTrainer(args) + trainer.setup() + trainer.train() diff --git a/trainer.py b/trainer.py index ef2d3e1..151a37a 100644 --- a/trainer.py +++ b/trainer.py @@ -6,85 +6,95 @@ from torch.optim.lr_scheduler import StepLR from models.CC import CrowdCounter -from config import cfg +from config import cfg as default_cfg from misc.utils import * import pdb -class Trainer(): - def __init__(self, dataloader, cfg_data, pwd): +class Trainer: + def __init__(self, dataloader, cfg_data, pwd, cfg=None): self.cfg_data = cfg_data - - self.data_mode = cfg.DATASET - self.exp_name = cfg.EXP_NAME - self.exp_path = cfg.EXP_PATH + if cfg is None: + self.cfg = default_cfg + else: + self.cfg = cfg + + self.data_mode = self.cfg.DATASET + self.exp_name = self.cfg.EXP_NAME + self.exp_path = self.cfg.EXP_PATH self.pwd = pwd - self.net_name = cfg.NET - self.net = CrowdCounter(cfg.GPU_ID,self.net_name).cuda() - self.optimizer = optim.Adam(self.net.CCN.parameters(), lr=cfg.LR, weight_decay=1e-4) - # self.optimizer = optim.SGD(self.net.parameters(), cfg.LR, momentum=0.95,weight_decay=5e-4) - self.scheduler = StepLR(self.optimizer, step_size=cfg.NUM_EPOCH_LR_DECAY, gamma=cfg.LR_DECAY) - - self.train_record = {'best_mae': 1e20, 'best_mse':1e20, 'best_model_name': ''} - self.timer = {'iter time' : Timer(),'train time' : Timer(),'val time' : Timer()} + self.net_name = self.cfg.NET + self.net = CrowdCounter(self.cfg.GPU_ID, self.net_name).cuda() + self.optimizer = optim.Adam( + self.net.CCN.parameters(), lr=self.cfg.LR, weight_decay=1e-4 + ) + # self.optimizer = optim.SGD(self.net.parameters(), self.cfg.LR, momentum=0.95,weight_decay=5e-4) + self.scheduler = StepLR( + self.optimizer, + step_size=self.cfg.NUM_EPOCH_LR_DECAY, + gamma=self.cfg.LR_DECAY, + ) + + self.train_record = {"best_mae": 1e20, "best_mse": 1e20, "best_model_name": ""} + self.timer = {"iter time": Timer(), "train time": Timer(), "val time": Timer()} self.epoch = 0 self.i_tb = 0 - - if cfg.PRE_GCC: - self.net.load_state_dict(torch.load(cfg.PRE_GCC_MODEL)) - self.train_loader, self.val_loader, self.restore_transform = dataloader() + if self.cfg.PRE_GCC: + self.net.load_state_dict(torch.load(self.cfg.PRE_GCC_MODEL)) - if cfg.RESUME: - latest_state = torch.load(cfg.RESUME_PATH) - self.net.load_state_dict(latest_state['net']) - self.optimizer.load_state_dict(latest_state['optimizer']) - self.scheduler.load_state_dict(latest_state['scheduler']) - self.epoch = latest_state['epoch'] + 1 - self.i_tb = latest_state['i_tb'] - self.train_record = latest_state['train_record'] - self.exp_path = latest_state['exp_path'] - self.exp_name = latest_state['exp_name'] + self.train_loader, self.val_loader, self.restore_transform = dataloader(cfg_data=cfg_data) - self.writer, self.log_txt = logger(self.exp_path, self.exp_name, self.pwd, 'exp', resume=cfg.RESUME) + if self.cfg.RESUME: + latest_state = torch.load(self.cfg.RESUME_PATH) + self.net.load_state_dict(latest_state["net"]) + self.optimizer.load_state_dict(latest_state["optimizer"]) + self.scheduler.load_state_dict(latest_state["scheduler"]) + self.epoch = latest_state["epoch"] + 1 + self.i_tb = latest_state["i_tb"] + self.train_record = latest_state["train_record"] + self.exp_path = latest_state["exp_path"] + self.exp_name = latest_state["exp_name"] + self.writer, self.log_txt = logger( + self.exp_path, self.exp_name, self.pwd, "exp", resume=self.cfg.RESUME + ) def forward(self): # self.validate_V3() - for epoch in range(self.epoch,cfg.MAX_EPOCH): + for epoch in range(self.epoch, self.cfg.MAX_EPOCH): self.epoch = epoch - if epoch > cfg.LR_DECAY_START: + if epoch > self.cfg.LR_DECAY_START: self.scheduler.step() - - # training - self.timer['train time'].tic() + + # training + self.timer["train time"].tic() self.train() - self.timer['train time'].toc(average=False) + self.timer["train time"].toc(average=False) - print( 'train time: {:.2f}s'.format(self.timer['train time'].diff) ) - print( '='*20 ) + print("train time: {:.2f}s".format(self.timer["train time"].diff)) + print("=" * 20) # validation - if epoch%cfg.VAL_FREQ==0 or epoch>cfg.VAL_DENSE_START: - self.timer['val time'].tic() - if self.data_mode in ['SHHA', 'SHHB', 'QNRF', 'UCF50']: + if epoch % self.cfg.VAL_FREQ == 0 or epoch > self.cfg.VAL_DENSE_START: + self.timer["val time"].tic() + if self.data_mode in ["SHHA", "SHHB", "QNRF", "UCF50"]: self.validate_V1() - elif self.data_mode is 'WE': + elif self.data_mode == "WE": self.validate_V2() - elif self.data_mode is 'GCC': + elif self.data_mode == "GCC": self.validate_V3() - self.timer['val time'].toc(average=False) - print( 'val time: {:.2f}s'.format(self.timer['val time'].diff) ) - + self.timer["val time"].toc(average=False) + print("val time: {:.2f}s".format(self.timer["val time"].diff)) - def train(self): # training for all datasets + def train(self): # training for all datasets self.net.train() for i, data in enumerate(self.train_loader, 0): - self.timer['iter time'].tic() + self.timer["iter time"].tic() img, gt_map = data img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() @@ -95,19 +105,32 @@ def train(self): # training for all datasets loss.backward() self.optimizer.step() - if (i + 1) % cfg.PRINT_FREQ == 0: + if (i + 1) % self.cfg.PRINT_FREQ == 0: self.i_tb += 1 - self.writer.add_scalar('train_loss', loss.item(), self.i_tb) - self.timer['iter time'].toc(average=False) - print( '[ep %d][it %d][loss %.4f][lr %.4f][%.2fs]' % \ - (self.epoch + 1, i + 1, loss.item(), self.optimizer.param_groups[0]['lr']*10000, self.timer['iter time'].diff) ) - print( ' [cnt: gt: %.1f pred: %.2f]' % (gt_map[0].sum().data/self.cfg_data.LOG_PARA, pred_map[0].sum().data/self.cfg_data.LOG_PARA) ) - - - def validate_V1(self):# validate_V1 for SHHA, SHHB, UCF-QNRF, UCF50 + self.writer.add_scalar("train_loss", loss.item(), self.i_tb) + self.timer["iter time"].toc(average=False) + print( + "[ep %d][it %d][loss %.4f][lr %.4f][%.2fs]" + % ( + self.epoch + 1, + i + 1, + loss.item(), + self.optimizer.param_groups[0]["lr"] * 10000, + self.timer["iter time"].diff, + ) + ) + print( + " [cnt: gt: %.1f pred: %.2f]" + % ( + gt_map[0].sum().data / self.cfg_data.LOG_PARA, + pred_map[0].sum().data / self.cfg_data.LOG_PARA, + ) + ) + + def validate_V1(self): # validate_V1 for SHHA, SHHB, UCF-QNRF, UCF50 self.net.eval() - + losses = AverageMeter() maes = AverageMeter() mses = AverageMeter() @@ -119,37 +142,53 @@ def validate_V1(self):# validate_V1 for SHHA, SHHB, UCF-QNRF, UCF50 img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() - pred_map = self.net.forward(img,gt_map) + pred_map = self.net.forward(img, gt_map) pred_map = pred_map.data.cpu().numpy() gt_map = gt_map.data.cpu().numpy() for i_img in range(pred_map.shape[0]): - - pred_cnt = np.sum(pred_map[i_img])/self.cfg_data.LOG_PARA - gt_count = np.sum(gt_map[i_img])/self.cfg_data.LOG_PARA - + pred_cnt = np.sum(pred_map[i_img]) / self.cfg_data.LOG_PARA + gt_count = np.sum(gt_map[i_img]) / self.cfg_data.LOG_PARA + losses.update(self.net.loss.item()) - maes.update(abs(gt_count-pred_cnt)) - mses.update((gt_count-pred_cnt)*(gt_count-pred_cnt)) - if vi==0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) - + maes.update(abs(gt_count - pred_cnt)) + mses.update((gt_count - pred_cnt) * (gt_count - pred_cnt)) + if vi == 0: + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) + mae = maes.avg mse = np.sqrt(mses.avg) loss = losses.avg - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - self.writer.add_scalar('mse', mse, self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, mse, loss],self.train_record,self.log_txt) - print_summary(self.exp_name,[mae, mse, loss],self.train_record) - - - def validate_V2(self):# validate_V2 for WE + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + self.writer.add_scalar("mse", mse, self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, mse, loss], + self.train_record, + self.log_txt, + ) + print_summary(self.exp_name, [mae, mse, loss], self.train_record) + + def validate_V2(self): # validate_V2 for WE self.net.eval() @@ -157,13 +196,18 @@ def validate_V2(self):# validate_V2 for WE maes = AverageCategoryMeter(5) roi_mask = [] - from datasets.WE.setting import cfg_data + from datasets.WE.setting import cfg_data from scipy import io as sio + for val_folder in cfg_data.VAL_FOLDER: - roi_mask.append(sio.loadmat(os.path.join(cfg_data.DATA_PATH,'test',val_folder + '_roi.mat'))['BW']) - - for i_sub,i_loader in enumerate(self.val_loader,0): + roi_mask.append( + sio.loadmat( + os.path.join(cfg_data.DATA_PATH, "test", val_folder + "_roi.mat") + )["BW"] + ) + + for i_sub, i_loader in enumerate(self.val_loader, 0): mask = roi_mask[i_sub] for vi, data in enumerate(i_loader, 0): @@ -173,51 +217,74 @@ def validate_V2(self):# validate_V2 for WE img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() - pred_map = self.net.forward(img,gt_map) + pred_map = self.net.forward(img, gt_map) pred_map = pred_map.data.cpu().numpy() gt_map = gt_map.data.cpu().numpy() for i_img in range(pred_map.shape[0]): - - pred_cnt = np.sum(pred_map[i_img])/self.cfg_data.LOG_PARA - gt_count = np.sum(gt_map[i_img])/self.cfg_data.LOG_PARA - - losses.update(self.net.loss.item(),i_sub) - maes.update(abs(gt_count-pred_cnt),i_sub) - if vi==0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) - - mae = np.average(maes.avg) - loss = np.average(losses.avg) - - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - self.writer.add_scalar('mae_s1', maes.avg[0], self.epoch + 1) - self.writer.add_scalar('mae_s2', maes.avg[1], self.epoch + 1) - self.writer.add_scalar('mae_s3', maes.avg[2], self.epoch + 1) - self.writer.add_scalar('mae_s4', maes.avg[3], self.epoch + 1) - self.writer.add_scalar('mae_s5', maes.avg[4], self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, 0, loss],self.train_record,self.log_txt) - print_WE_summary(self.log_txt,self.epoch,[mae, 0, loss],self.train_record,maes) - - + pred_cnt = np.sum(pred_map[i_img]) / self.cfg_data.LOG_PARA + gt_count = np.sum(gt_map[i_img]) / self.cfg_data.LOG_PARA + + losses.update(self.net.loss.item(), i_sub) + maes.update(abs(gt_count - pred_cnt), i_sub) + if vi == 0: + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) + mae = np.average(maes.avg) + loss = np.average(losses.avg) - def validate_V3(self):# validate_V3 for GCC + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + self.writer.add_scalar("mae_s1", maes.avg[0], self.epoch + 1) + self.writer.add_scalar("mae_s2", maes.avg[1], self.epoch + 1) + self.writer.add_scalar("mae_s3", maes.avg[2], self.epoch + 1) + self.writer.add_scalar("mae_s4", maes.avg[3], self.epoch + 1) + self.writer.add_scalar("mae_s5", maes.avg[4], self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, 0, loss], + self.train_record, + self.log_txt, + ) + print_WE_summary( + self.log_txt, self.epoch, [mae, 0, loss], self.train_record, maes + ) + + def validate_V3(self): # validate_V3 for GCC self.net.eval() - + losses = AverageMeter() maes = AverageMeter() mses = AverageMeter() - c_maes = {'level':AverageCategoryMeter(9), 'time':AverageCategoryMeter(8),'weather':AverageCategoryMeter(7)} - c_mses = {'level':AverageCategoryMeter(9), 'time':AverageCategoryMeter(8),'weather':AverageCategoryMeter(7)} - + c_maes = { + "level": AverageCategoryMeter(9), + "time": AverageCategoryMeter(8), + "weather": AverageCategoryMeter(7), + } + c_mses = { + "level": AverageCategoryMeter(9), + "time": AverageCategoryMeter(8), + "weather": AverageCategoryMeter(7), + } for vi, data in enumerate(self.val_loader, 0): img, gt_map, attributes_pt = data @@ -226,46 +293,67 @@ def validate_V3(self):# validate_V3 for GCC img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() - - pred_map = self.net.forward(img,gt_map) + pred_map = self.net.forward(img, gt_map) pred_map = pred_map.data.cpu().numpy() gt_map = gt_map.data.cpu().numpy() for i_img in range(pred_map.shape[0]): - - pred_cnt = np.sum(pred_map[i_img])/self.cfg_data.LOG_PARA - gt_count = np.sum(gt_map[i_img])/self.cfg_data.LOG_PARA - s_mae = abs(gt_count-pred_cnt) - s_mse = (gt_count-pred_cnt)*(gt_count-pred_cnt) + pred_cnt = np.sum(pred_map[i_img]) / self.cfg_data.LOG_PARA + gt_count = np.sum(gt_map[i_img]) / self.cfg_data.LOG_PARA + + s_mae = abs(gt_count - pred_cnt) + s_mse = (gt_count - pred_cnt) * (gt_count - pred_cnt) losses.update(self.net.loss.item()) maes.update(s_mae) - mses.update(s_mse) - attributes_pt = attributes_pt.squeeze() - c_maes['level'].update(s_mae,attributes_pt[i_img][0]) - c_mses['level'].update(s_mse,attributes_pt[i_img][0]) - c_maes['time'].update(s_mae,attributes_pt[i_img][1]/3) - c_mses['time'].update(s_mse,attributes_pt[i_img][1]/3) - c_maes['weather'].update(s_mae,attributes_pt[i_img][2]) - c_mses['weather'].update(s_mse,attributes_pt[i_img][2]) - - - if vi==0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) - + mses.update(s_mse) + attributes_pt = attributes_pt.squeeze() + c_maes["level"].update(s_mae, attributes_pt[i_img][0]) + c_mses["level"].update(s_mse, attributes_pt[i_img][0]) + c_maes["time"].update(s_mae, attributes_pt[i_img][1] / 3) + c_mses["time"].update(s_mse, attributes_pt[i_img][1] / 3) + c_maes["weather"].update(s_mae, attributes_pt[i_img][2]) + c_mses["weather"].update(s_mse, attributes_pt[i_img][2]) + + if vi == 0: + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) + loss = losses.avg mae = maes.avg mse = np.sqrt(mses.avg) - - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - self.writer.add_scalar('mse', mse, self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, mse, loss],self.train_record,self.log_txt) - - - print_GCC_summary(self.log_txt,self.epoch,[mae, mse, loss],self.train_record,c_maes,c_mses) + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + self.writer.add_scalar("mse", mse, self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, mse, loss], + self.train_record, + self.log_txt, + ) + + print_GCC_summary( + self.log_txt, + self.epoch, + [mae, mse, loss], + self.train_record, + c_maes, + c_mses, + ) diff --git a/trainer_for_CMTL.py b/trainer_for_CMTL.py index fb9ada4..7917363 100644 --- a/trainer_for_CMTL.py +++ b/trainer_for_CMTL.py @@ -6,72 +6,81 @@ from torch.optim.lr_scheduler import StepLR from models.M2T2OCC import CrowdCounter -from config import cfg +from config import cfg as default_cfg from misc.utils import * import pdb - - -class Trainer(): - def __init__(self, dataloader, cfg_data, pwd): +class Trainer: + def __init__(self, dataloader, cfg_data, pwd, cfg=None): self.cfg_data = cfg_data - - self.data_mode = cfg.DATASET - self.exp_name = cfg.EXP_NAME - self.exp_path = cfg.EXP_PATH + if cfg is None: + self.cfg = default_cfg + else: + self.cfg = cfg + + self.data_mode = self.cfg.DATASET + self.exp_name = self.cfg.EXP_NAME + self.exp_path = self.cfg.EXP_PATH self.pwd = pwd - self.net_name = cfg.NET + self.net_name = self.cfg.NET - self.train_loader, self.val_loader, self.restore_transform = dataloader() + self.train_loader, self.val_loader, self.restore_transform = dataloader(cfg_data=cfg_data) - - if self.net_name in ['CMTL']: + if self.net_name in ["CMTL"]: # use for gt's class labeling - self.max_gt_count = 0. - self.min_gt_count = 0x7f7f7f + self.max_gt_count = 0.0 + self.min_gt_count = 0x7F7F7F self.num_classes = 10 - self.bin_val = 0. + self.bin_val = 0.0 self.pre_max_min_bin_val() ce_weights = torch.from_numpy(self.pre_weights()).float() loss_1_fn = nn.MSELoss() - + loss_2_fn = nn.BCELoss(weight=ce_weights) - self.net = CrowdCounter(cfg.GPU_ID, self.net_name,loss_1_fn,loss_2_fn).cuda() - self.optimizer = optim.Adam(self.net.CCN.parameters(), lr=cfg.LR, weight_decay=1e-4) + self.net = CrowdCounter( + self.cfg.GPU_ID, self.net_name, loss_1_fn, loss_2_fn + ).cuda() + self.optimizer = optim.Adam( + self.net.CCN.parameters(), lr=self.cfg.LR, weight_decay=1e-4 + ) # self.optimizer = optim.SGD(self.net.parameters(), cfg.LR, momentum=0.95,weight_decay=5e-4) - self.scheduler = StepLR(self.optimizer, step_size=cfg.NUM_EPOCH_LR_DECAY, gamma=cfg.LR_DECAY) + self.scheduler = StepLR( + self.optimizer, + step_size=self.cfg.NUM_EPOCH_LR_DECAY, + gamma=self.cfg.LR_DECAY, + ) - self.train_record = {'best_mae': 1e20, 'best_mse': 1e20, 'best_model_name': ''} - self.timer = {'iter time': Timer(), 'train time': Timer(), 'val time': Timer()} + self.train_record = {"best_mae": 1e20, "best_mse": 1e20, "best_model_name": ""} + self.timer = {"iter time": Timer(), "train time": Timer(), "val time": Timer()} self.i_tb = 0 self.epoch = 0 - if cfg.PRE_GCC: - self.net.load_state_dict(torch.load(cfg.PRE_GCC_MODEL)) - - if cfg.RESUME: - latest_state = torch.load(cfg.RESUME_PATH) - self.net.load_state_dict(latest_state['net']) - self.optimizer.load_state_dict(latest_state['optimizer']) - self.scheduler.load_state_dict(latest_state['scheduler']) - self.epoch = latest_state['epoch'] + 1 - self.i_tb = latest_state['i_tb'] - self.train_record = latest_state['train_record'] - self.exp_path = latest_state['exp_path'] - self.exp_name = latest_state['exp_name'] - - self.writer, self.log_txt = logger(self.exp_path, self.exp_name, self.pwd, 'exp', resume=cfg.RESUME) + if self.cfg.PRE_GCC: + self.net.load_state_dict(torch.load(self.cfg.PRE_GCC_MODEL)) + if self.cfg.RESUME: + latest_state = torch.load(self.cfg.RESUME_PATH) + self.net.load_state_dict(latest_state["net"]) + self.optimizer.load_state_dict(latest_state["optimizer"]) + self.scheduler.load_state_dict(latest_state["scheduler"]) + self.epoch = latest_state["epoch"] + 1 + self.i_tb = latest_state["i_tb"] + self.train_record = latest_state["train_record"] + self.exp_path = latest_state["exp_path"] + self.exp_name = latest_state["exp_name"] + self.writer, self.log_txt = logger( + self.exp_path, self.exp_name, self.pwd, "exp", resume=self.cfg.RESUME + ) def pre_max_min_bin_val(self): for i, data in enumerate(self.train_loader, 0): @@ -86,8 +95,8 @@ def pre_max_min_bin_val(self): elif temp_count < self.min_gt_count: self.min_gt_count = temp_count - print( '[max_gt: %.2f min_gt: %.2f]' % (self.max_gt_count, self.min_gt_count) ) - self.bin_val = (self.max_gt_count - self.min_gt_count)/float(self.num_classes) + print("[max_gt: %.2f min_gt: %.2f]" % (self.max_gt_count, self.min_gt_count)) + self.bin_val = (self.max_gt_count - self.min_gt_count) / float(self.num_classes) def pre_weights(self): count_class_hist = np.zeros(self.num_classes) @@ -96,18 +105,19 @@ def pre_weights(self): _, gt_map = data for j in range(0, gt_map.size()[0]): temp_count = gt_map[j].sum() / self.cfg_data.LOG_PARA - class_idx = min(int(temp_count/self.bin_val), self.num_classes-1) + class_idx = min( + int(temp_count / self.bin_val), self.num_classes - 1 + ) count_class_hist[class_idx] += 1 wts = count_class_hist - wts = 1-wts/(sum(wts)); - wts = wts/sum(wts); - print( 'pre_wts:' ) - print( wts ) + wts = 1 - wts / (sum(wts)) + wts = wts / sum(wts) + print("pre_wts:") + print(wts) return wts - def online_assign_gt_class_labels(self, gt_map_batch): batch = gt_map_batch.size()[0] # pdb.set_trace() @@ -116,78 +126,91 @@ def online_assign_gt_class_labels(self, gt_map_batch): for i in range(0, batch): # pdb.set_trace() - gt_count = (gt_map_batch[i].sum().item() / self.cfg_data.LOG_PARA) + gt_count = gt_map_batch[i].sum().item() / self.cfg_data.LOG_PARA # generate gt's label same as implement of CMTL by Viswa gt_class_label = np.zeros(self.num_classes, dtype=np.int) # bin_val = ((self.max_gt_count - self.min_gt_count)/float(self.num_classes)) - class_idx = min(int(gt_count/self.bin_val), self.num_classes-1) + class_idx = min(int(gt_count / self.bin_val), self.num_classes - 1) gt_class_label[class_idx] = 1 # pdb.set_trace() label[i] = gt_class_label.reshape(1, self.num_classes) - return torch.from_numpy(label).float() def forward(self): # self.validate_V1() - for epoch in range(self.epoch, cfg.MAX_EPOCH): + for epoch in range(self.epoch, self.cfg.MAX_EPOCH): self.epoch = epoch - if epoch > cfg.LR_DECAY_START: + if epoch > self.cfg.LR_DECAY_START: self.scheduler.step() # training - self.timer['train time'].tic() + self.timer["train time"].tic() self.train() - self.timer['train time'].toc(average=False) + self.timer["train time"].toc(average=False) - print( 'train time: {:.2f}s'.format(self.timer['train time'].diff) ) - print( '=' * 20 ) + print("train time: {:.2f}s".format(self.timer["train time"].diff)) + print("=" * 20) # validation - if epoch % cfg.VAL_FREQ == 0 or epoch > cfg.VAL_DENSE_START: - self.timer['val time'].tic() - if self.data_mode in ['SHHA', 'SHHB', 'QNRF', 'UCF50']: + if epoch % self.cfg.VAL_FREQ == 0 or epoch > self.cfg.VAL_DENSE_START: + self.timer["val time"].tic() + if self.data_mode in ["SHHA", "SHHB", "QNRF", "UCF50"]: self.validate_V1() - elif self.data_mode is 'WE': + elif self.data_mode == "WE": self.validate_V2() - elif self.data_mode is 'GCC': + elif self.data_mode == "GCC": self.validate_V3() - self.timer['val time'].toc(average=False) - print( 'val time: {:.2f}s'.format(self.timer['val time'].diff) ) + self.timer["val time"].toc(average=False) + print("val time: {:.2f}s".format(self.timer["val time"].diff)) def train(self): # training for all datasets self.net.train() for i, data in enumerate(self.train_loader, 0): # train net - self.timer['iter time'].tic() + self.timer["iter time"].tic() img, gt_map = data img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() gt_label = self.online_assign_gt_class_labels(gt_map) gt_label = Variable(gt_label).cuda() - self.optimizer.zero_grad() pred_map = self.net(img, gt_map, gt_label) - loss1,loss2 = self.net.loss - loss = loss1+loss2 + loss1, loss2 = self.net.loss + loss = loss1 + loss2 # loss = loss1 loss.backward() self.optimizer.step() - if (i + 1) % cfg.PRINT_FREQ == 0: + if (i + 1) % self.cfg.PRINT_FREQ == 0: self.i_tb += 1 - self.writer.add_scalar('train_loss', loss.item(), self.i_tb) - self.writer.add_scalar('train_loss1', loss1.item(), self.i_tb) - self.writer.add_scalar('train_loss2', loss2.item(), self.i_tb) - self.timer['iter time'].toc(average=False) - print( '[ep %d][it %d][loss %.8f, %.8f, %.8f][lr %.4f][%.2fs]' % \ - (self.epoch + 1, i + 1, loss.item(),loss1.item(),loss2.item(), self.optimizer.param_groups[0]['lr'] * 10000, - self.timer['iter time'].diff) ) - print( ' [cnt: gt: %.1f pred: %.2f]' % (gt_map[0].sum().data/self.cfg_data.LOG_PARA, pred_map[0].sum().data/self.cfg_data.LOG_PARA) ) + self.writer.add_scalar("train_loss", loss.item(), self.i_tb) + self.writer.add_scalar("train_loss1", loss1.item(), self.i_tb) + self.writer.add_scalar("train_loss2", loss2.item(), self.i_tb) + self.timer["iter time"].toc(average=False) + print( + "[ep %d][it %d][loss %.8f, %.8f, %.8f][lr %.4f][%.2fs]" + % ( + self.epoch + 1, + i + 1, + loss.item(), + loss1.item(), + loss2.item(), + self.optimizer.param_groups[0]["lr"] * 10000, + self.timer["iter time"].diff, + ) + ) + print( + " [cnt: gt: %.1f pred: %.2f]" + % ( + gt_map[0].sum().data / self.cfg_data.LOG_PARA, + pred_map[0].sum().data / self.cfg_data.LOG_PARA, + ) + ) def validate_V1(self): # validate_V1 for SHHA, SHHB, UCF-QNRF, UCF50 @@ -215,25 +238,43 @@ def validate_V1(self): # validate_V1 for SHHA, SHHB, UCF-QNRF, UCF50 pred_cnt = np.sum(pred_map) / self.cfg_data.LOG_PARA gt_count = np.sum(gt_map) / self.cfg_data.LOG_PARA - loss1,loss2 = self.net.loss + loss1, loss2 = self.net.loss # loss = loss1.item()+loss2.item() loss = loss1.item() losses.update(loss) maes.update(abs(gt_count - pred_cnt)) mses.update((gt_count - pred_cnt) * (gt_count - pred_cnt)) if vi == 0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) mae = maes.avg mse = np.sqrt(mses.avg) loss = losses.avg - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - self.writer.add_scalar('mse', mse, self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, mse, loss],self.train_record,self.log_txt) + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + self.writer.add_scalar("mse", mse, self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, mse, loss], + self.train_record, + self.log_txt, + ) print_summary(self.exp_name, [mae, mse, loss], self.train_record) @@ -258,26 +299,43 @@ def validate_V2(self): # validate_V2 for WE pred_map = pred_map.data.cpu().numpy() gt_map = gt_map.data.cpu().numpy() - for i_img in range(pred_map.shape[0]): - - pred_cnt = np.sum(pred_map[i_img])/self.cfg_data.LOG_PARA - gt_count = np.sum(gt_map[i_img])/self.cfg_data.LOG_PARA - losses.update(self.net.loss.item(),i_sub) - maes.update(abs(gt_count-pred_cnt),i_sub) + pred_cnt = np.sum(pred_map[i_img]) / self.cfg_data.LOG_PARA + gt_count = np.sum(gt_map[i_img]) / self.cfg_data.LOG_PARA + + losses.update(self.net.loss.item(), i_sub) + maes.update(abs(gt_count - pred_cnt), i_sub) if vi == 0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) mae = np.average(maes.avg) loss = np.average(losses.avg) - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, 0, loss],self.train_record,self.log_txt) + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, 0, loss], + self.train_record, + self.log_txt, + ) print_summary(self.exp_name, [mae, 0, loss], self.train_record) def validate_V3(self): # validate_V3 for GCC @@ -288,8 +346,16 @@ def validate_V3(self): # validate_V3 for GCC maes = AverageMeter() mses = AverageMeter() - c_maes = {'level': AverageCategoryMeter(9), 'time': AverageCategoryMeter(8), 'weather': AverageCategoryMeter(7)} - c_mses = {'level': AverageCategoryMeter(9), 'time': AverageCategoryMeter(8), 'weather': AverageCategoryMeter(7)} + c_maes = { + "level": AverageCategoryMeter(9), + "time": AverageCategoryMeter(8), + "weather": AverageCategoryMeter(7), + } + c_mses = { + "level": AverageCategoryMeter(9), + "time": AverageCategoryMeter(8), + "weather": AverageCategoryMeter(7), + } for vi, data in enumerate(self.val_loader, 0): img, gt_map, attributes_pt = data @@ -313,32 +379,48 @@ def validate_V3(self): # validate_V3 for GCC losses.update(self.net.loss.item()) maes.update(s_mae) mses.update(s_mse) - c_maes['level'].update(s_mae, attributes_pt[i_img][0]) - c_mses['level'].update(s_mse, attributes_pt[i_img][0]) - c_maes['time'].update(s_mae, attributes_pt[i_img][1] / 3) - c_mses['time'].update(s_mse, attributes_pt[i_img][1] / 3) - c_maes['weather'].update(s_mae, attributes_pt[i_img][2]) - c_mses['weather'].update(s_mse, attributes_pt[i_img][2]) + c_maes["level"].update(s_mae, attributes_pt[i_img][0]) + c_mses["level"].update(s_mse, attributes_pt[i_img][0]) + c_maes["time"].update(s_mae, attributes_pt[i_img][1] / 3) + c_mses["time"].update(s_mse, attributes_pt[i_img][1] / 3) + c_maes["weather"].update(s_mae, attributes_pt[i_img][2]) + c_mses["weather"].update(s_mse, attributes_pt[i_img][2]) if vi == 0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) loss = losses.avg mae = maes.avg mse = np.sqrt(mses.avg) - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - self.writer.add_scalar('mse', mse, self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, mse, loss],self.train_record,self.log_txt) - - c_mses['level'] = np.sqrt(c_mses['level'].avg) - c_mses['time'] = np.sqrt(c_mses['time'].avg) - c_mses['weather'] = np.sqrt(c_mses['weather'].avg) - print_GCC_summary(self.exp_name, [mae, mse, loss], self.train_record, c_maes, c_mses) - - - - + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + self.writer.add_scalar("mse", mse, self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, mse, loss], + self.train_record, + self.log_txt, + ) + + c_mses["level"] = np.sqrt(c_mses["level"].avg) + c_mses["time"] = np.sqrt(c_mses["time"].avg) + c_mses["weather"] = np.sqrt(c_mses["weather"].avg) + print_GCC_summary( + self.exp_name, [mae, mse, loss], self.train_record, c_maes, c_mses + ) diff --git a/trainer_for_M2TCC.py b/trainer_for_M2TCC.py index 2fca3b7..6f8a5fa 100644 --- a/trainer_for_M2TCC.py +++ b/trainer_for_M2TCC.py @@ -6,121 +6,143 @@ from torch.optim.lr_scheduler import StepLR from models.M2TCC import CrowdCounter -from config import cfg +from config import cfg as default_cfg from misc.utils import * import pdb -class Trainer(): - def __init__(self, dataloader, cfg_data, pwd): +class Trainer: + def __init__(self, dataloader, cfg_data, pwd, cfg=None): self.cfg_data = cfg_data - - self.data_mode = cfg.DATASET - self.exp_name = cfg.EXP_NAME - self.exp_path = cfg.EXP_PATH + if cfg is None: + self.cfg = default_cfg + else: + self.cfg = cfg + + self.data_mode = self.cfg.DATASET + self.exp_name = self.cfg.EXP_NAME + self.exp_path = self.cfg.EXP_PATH self.pwd = pwd - self.net_name = cfg.NET + self.net_name = self.cfg.NET - if self.net_name in ['SANet']: + if self.net_name in ["SANet"]: loss_1_fn = nn.MSELoss() from misc import pytorch_ssim + loss_2_fn = pytorch_ssim.SSIM(window_size=11) - self.net = CrowdCounter(cfg.GPU_ID,self.net_name,loss_1_fn,loss_2_fn).cuda() - self.optimizer = optim.Adam(self.net.CCN.parameters(), lr=cfg.LR, weight_decay=1e-4) + self.net = CrowdCounter( + self.cfg.GPU_ID, self.net_name, loss_1_fn, loss_2_fn + ).cuda() + self.optimizer = optim.Adam( + self.net.CCN.parameters(), lr=self.cfg.LR, weight_decay=1e-4 + ) # self.optimizer = optim.SGD(self.net.parameters(), cfg.LR, momentum=0.95,weight_decay=5e-4) - self.scheduler = StepLR(self.optimizer, step_size=cfg.NUM_EPOCH_LR_DECAY, gamma=cfg.LR_DECAY) - - self.train_record = {'best_mae': 1e20, 'best_mse':1e20, 'best_model_name': ''} - self.timer = {'iter time' : Timer(),'train time' : Timer(),'val time' : Timer()} + self.scheduler = StepLR( + self.optimizer, + step_size=self.cfg.NUM_EPOCH_LR_DECAY, + gamma=self.cfg.LR_DECAY, + ) + self.train_record = {"best_mae": 1e20, "best_mse": 1e20, "best_model_name": ""} + self.timer = {"iter time": Timer(), "train time": Timer(), "val time": Timer()} self.epoch = 0 self.i_tb = 0 - - if cfg.PRE_GCC: - self.net.load_state_dict(torch.load(cfg.PRE_GCC_MODEL)) - self.train_loader, self.val_loader, self.restore_transform = dataloader() + if self.cfg.PRE_GCC: + self.net.load_state_dict(torch.load(self.cfg.PRE_GCC_MODEL)) - if cfg.RESUME: - latest_state = torch.load(cfg.RESUME_PATH) - self.net.load_state_dict(latest_state['net']) - self.optimizer.load_state_dict(latest_state['optimizer']) - self.scheduler.load_state_dict(latest_state['scheduler']) - self.epoch = latest_state['epoch'] + 1 - self.i_tb = latest_state['i_tb'] - self.train_record = latest_state['train_record'] - self.exp_path = latest_state['exp_path'] - self.exp_name = latest_state['exp_name'] + self.train_loader, self.val_loader, self.restore_transform = dataloader(cfg_data=cfg_data) - self.writer, self.log_txt = logger(self.exp_path, self.exp_name, self.pwd, 'exp', resume=cfg.RESUME) + if self.cfg.RESUME: + latest_state = torch.load(self.cfg.RESUME_PATH) + self.net.load_state_dict(latest_state["net"]) + self.optimizer.load_state_dict(latest_state["optimizer"]) + self.scheduler.load_state_dict(latest_state["scheduler"]) + self.epoch = latest_state["epoch"] + 1 + self.i_tb = latest_state["i_tb"] + self.train_record = latest_state["train_record"] + self.exp_path = latest_state["exp_path"] + self.exp_name = latest_state["exp_name"] + self.writer, self.log_txt = logger( + self.exp_path, self.exp_name, self.pwd, "exp", resume=self.cfg.RESUME + ) def forward(self): # self.validate_V3() - for epoch in range(self.epoch, cfg.MAX_EPOCH): + for epoch in range(self.epoch, self.cfg.MAX_EPOCH): self.epoch = epoch - if epoch > cfg.LR_DECAY_START: + if epoch > self.cfg.LR_DECAY_START: self.scheduler.step() - - # training - self.timer['train time'].tic() + + # training + self.timer["train time"].tic() self.train() - self.timer['train time'].toc(average=False) + self.timer["train time"].toc(average=False) - print( 'train time: {:.2f}s'.format(self.timer['train time'].diff) ) - print( '='*20 ) + print("train time: {:.2f}s".format(self.timer["train time"].diff)) + print("=" * 20) # validation - if epoch%cfg.VAL_FREQ==0 or epoch>cfg.VAL_DENSE_START: - self.timer['val time'].tic() - if self.data_mode in ['SHHA', 'SHHB', 'QNRF', 'UCF50']: + if epoch % self.cfg.VAL_FREQ == 0 or epoch > self.cfg.VAL_DENSE_START: + self.timer["val time"].tic() + if self.data_mode in ["SHHA", "SHHB", "QNRF", "UCF50"]: self.validate_V1() - elif self.data_mode is 'WE': + elif self.data_mode == "WE": self.validate_V2() - elif self.data_mode is 'GCC': + elif self.data_mode == "GCC": self.validate_V3() - self.timer['val time'].toc(average=False) - print( 'val time: {:.2f}s'.format(self.timer['val time'].diff) ) + self.timer["val time"].toc(average=False) + print("val time: {:.2f}s".format(self.timer["val time"].diff)) - - def train(self): # training for all datasets + def train(self): # training for all datasets self.net.train() for i, data in enumerate(self.train_loader, 0): - self.timer['iter time'].tic() + self.timer["iter time"].tic() img, gt_map = data img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() self.optimizer.zero_grad() pred_map = self.net(img, gt_map) - loss1,loss2 = self.net.loss - loss = loss1+loss2 + loss1, loss2 = self.net.loss + loss = loss1 + loss2 loss.backward() self.optimizer.step() - if (i + 1) % cfg.PRINT_FREQ == 0: + if (i + 1) % self.cfg.PRINT_FREQ == 0: self.i_tb += 1 - self.writer.add_scalar('train_loss', loss.item(), self.i_tb) - self.writer.add_scalar('train_loss1', loss1.item(), self.i_tb) - self.writer.add_scalar('train_loss2', loss2.item(), self.i_tb) - self.timer['iter time'].toc(average=False) - print( '[ep %d][it %d][loss %.4f][lr %.4f][%.2fs]' % \ - (self.epoch + 1, i + 1, loss.item(), self.optimizer.param_groups[0]['lr']*10000, self.timer['iter time'].diff) ) - print( ' [cnt: gt: %.1f pred: %.2f]' % (gt_map[0].sum().data/self.cfg_data.LOG_PARA, pred_map[0].sum().data/self.cfg_data.LOG_PARA) ) - - - - - - def validate_V1(self):# validate_V1 for SHHA, SHHB, UCF-QNRF, UCF50 + self.writer.add_scalar("train_loss", loss.item(), self.i_tb) + self.writer.add_scalar("train_loss1", loss1.item(), self.i_tb) + self.writer.add_scalar("train_loss2", loss2.item(), self.i_tb) + self.timer["iter time"].toc(average=False) + print( + "[ep %d][it %d][loss %.4f][lr %.4f][%.2fs]" + % ( + self.epoch + 1, + i + 1, + loss.item(), + self.optimizer.param_groups[0]["lr"] * 10000, + self.timer["iter time"].diff, + ) + ) + print( + " [cnt: gt: %.1f pred: %.2f]" + % ( + gt_map[0].sum().data / self.cfg_data.LOG_PARA, + pred_map[0].sum().data / self.cfg_data.LOG_PARA, + ) + ) + + def validate_V1(self): # validate_V1 for SHHA, SHHB, UCF-QNRF, UCF50 self.net.eval() - + losses = AverageMeter() maes = AverageMeter() mses = AverageMeter() @@ -128,50 +150,65 @@ def validate_V1(self):# validate_V1 for SHHA, SHHB, UCF-QNRF, UCF50 for vi, data in enumerate(self.val_loader, 0): img, gt_map = data - with torch.no_grad(): img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() - pred_map = self.net.forward(img,gt_map) + pred_map = self.net.forward(img, gt_map) pred_map = pred_map.data.cpu().numpy() gt_map = gt_map.data.cpu().numpy() - + for i_img in range(pred_map.shape[0]): - pred_cnt = np.sum(pred_map[i_img])/self.cfg_data.LOG_PARA - gt_count = np.sum(gt_map[i_img])/self.cfg_data.LOG_PARA + pred_cnt = np.sum(pred_map[i_img]) / self.cfg_data.LOG_PARA + gt_count = np.sum(gt_map[i_img]) / self.cfg_data.LOG_PARA - loss1,loss2 = self.net.loss - loss = loss1.item()+loss2.item() + loss1, loss2 = self.net.loss + loss = loss1.item() + loss2.item() losses.update(loss) - maes.update(abs(gt_count-pred_cnt)) - mses.update((gt_count-pred_cnt)*(gt_count-pred_cnt)) - if vi==0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) - + maes.update(abs(gt_count - pred_cnt)) + mses.update((gt_count - pred_cnt) * (gt_count - pred_cnt)) + if vi == 0: + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) + mae = maes.avg mse = np.sqrt(mses.avg) loss = losses.avg - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - self.writer.add_scalar('mse', mse, self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, mse, loss],self.train_record,self.log_txt) - print_summary(self.exp_name,[mae, mse, loss],self.train_record) - - - def validate_V2(self):# validate_V2 for WE + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + self.writer.add_scalar("mse", mse, self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, mse, loss], + self.train_record, + self.log_txt, + ) + print_summary(self.exp_name, [mae, mse, loss], self.train_record) + + def validate_V2(self): # validate_V2 for WE self.net.eval() losses = AverageCategoryMeter(5) maes = AverageCategoryMeter(5) - - for i_sub,i_loader in enumerate(self.val_loader,0): + for i_sub, i_loader in enumerate(self.val_loader, 0): for vi, data in enumerate(i_loader, 0): img, gt_map = data @@ -180,43 +217,66 @@ def validate_V2(self):# validate_V2 for WE img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() - pred_map = self.net.forward(img,gt_map) + pred_map = self.net.forward(img, gt_map) pred_map = pred_map.data.cpu().numpy() gt_map = gt_map.data.cpu().numpy() - - + for i_img in range(pred_map.shape[0]): - pred_cnt = np.sum(pred_map[i_img])/self.cfg_data.LOG_PARA - gt_count = np.sum(gt_map[i_img])/self.cfg_data.LOG_PARA - - losses.update(self.net.loss.item(),i_sub) - maes.update(abs(gt_count-pred_cnt),i_sub) - if vi==0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) - + pred_cnt = np.sum(pred_map[i_img]) / self.cfg_data.LOG_PARA + gt_count = np.sum(gt_map[i_img]) / self.cfg_data.LOG_PARA + + losses.update(self.net.loss.item(), i_sub) + maes.update(abs(gt_count - pred_cnt), i_sub) + if vi == 0: + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) + mae = np.average(maes.avg) loss = np.average(losses.avg) - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, 0, loss],self.train_record,self.log_txt) - print_summary(self.exp_name,[mae, 0, loss],self.train_record) - - - def validate_V3(self):# validate_V3 for GCC + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, 0, loss], + self.train_record, + self.log_txt, + ) + print_summary(self.exp_name, [mae, 0, loss], self.train_record) + + def validate_V3(self): # validate_V3 for GCC self.net.eval() - + losses = AverageMeter() maes = AverageMeter() mses = AverageMeter() - c_maes = {'level':AverageCategoryMeter(9), 'time':AverageCategoryMeter(8),'weather':AverageCategoryMeter(7)} - c_mses = {'level':AverageCategoryMeter(9), 'time':AverageCategoryMeter(8),'weather':AverageCategoryMeter(7)} - + c_maes = { + "level": AverageCategoryMeter(9), + "time": AverageCategoryMeter(8), + "weather": AverageCategoryMeter(7), + } + c_mses = { + "level": AverageCategoryMeter(9), + "time": AverageCategoryMeter(8), + "weather": AverageCategoryMeter(7), + } for vi, data in enumerate(self.val_loader, 0): img, gt_map, attributes_pt = data @@ -225,46 +285,68 @@ def validate_V3(self):# validate_V3 for GCC img = Variable(img).cuda() gt_map = Variable(gt_map).cuda() - - pred_map = self.net.forward(img,gt_map) + pred_map = self.net.forward(img, gt_map) pred_map = pred_map.data.cpu().numpy() gt_map = gt_map.data.cpu().numpy() - + for i_img in range(pred_map.shape[0]): - pred_cnt = np.sum(pred_map)/self.cfg_data.LOG_PARA - gt_count = np.sum(gt_map)/self.cfg_data.LOG_PARA + pred_cnt = np.sum(pred_map) / self.cfg_data.LOG_PARA + gt_count = np.sum(gt_map) / self.cfg_data.LOG_PARA - s_mae = abs(gt_count-pred_cnt) - s_mse = (gt_count-pred_cnt)*(gt_count-pred_cnt) + s_mae = abs(gt_count - pred_cnt) + s_mse = (gt_count - pred_cnt) * (gt_count - pred_cnt) - loss1,loss2 = self.net.loss - loss = loss1.item()+loss2.item() + loss1, loss2 = self.net.loss + loss = loss1.item() + loss2.item() losses.update(loss) maes.update(s_mae) - mses.update(s_mse) - attributes_pt = attributes_pt.squeeze() - c_maes['level'].update(s_mae,attributes_pt[0]) - c_mses['level'].update(s_mse,attributes_pt[0]) - c_maes['time'].update(s_mae,attributes_pt[1]/3) - c_mses['time'].update(s_mse,attributes_pt[1]/3) - c_maes['weather'].update(s_mae,attributes_pt[2]) - c_mses['weather'].update(s_mse,attributes_pt[2]) - - - if vi==0: - vis_results(self.exp_name, self.epoch, self.writer, self.restore_transform, img, pred_map, gt_map) - + mses.update(s_mse) + attributes_pt = attributes_pt.squeeze() + c_maes["level"].update(s_mae, attributes_pt[0]) + c_mses["level"].update(s_mse, attributes_pt[0]) + c_maes["time"].update(s_mae, attributes_pt[1] / 3) + c_mses["time"].update(s_mse, attributes_pt[1] / 3) + c_maes["weather"].update(s_mae, attributes_pt[2]) + c_mses["weather"].update(s_mse, attributes_pt[2]) + + if vi == 0: + vis_results( + self.exp_name, + self.epoch, + self.writer, + self.restore_transform, + img, + pred_map, + gt_map, + ) + loss = losses.avg mae = maes.avg mse = np.sqrt(mses.avg) - - self.writer.add_scalar('val_loss', loss, self.epoch + 1) - self.writer.add_scalar('mae', mae, self.epoch + 1) - self.writer.add_scalar('mse', mse, self.epoch + 1) - - self.train_record = update_model(self.net,self.optimizer,self.scheduler,self.epoch,self.i_tb,self.exp_path,self.exp_name, \ - [mae, mse, loss],self.train_record,self.log_txt) - - print_GCC_summary(self.log_txt,self.epoch,[mae, mse, loss],self.train_record,c_maes,c_mses) \ No newline at end of file + self.writer.add_scalar("val_loss", loss, self.epoch + 1) + self.writer.add_scalar("mae", mae, self.epoch + 1) + self.writer.add_scalar("mse", mse, self.epoch + 1) + + self.train_record = update_model( + self.net, + self.optimizer, + self.scheduler, + self.epoch, + self.i_tb, + self.exp_path, + self.exp_name, + [mae, mse, loss], + self.train_record, + self.log_txt, + ) + + print_GCC_summary( + self.log_txt, + self.epoch, + [mae, mse, loss], + self.train_record, + c_maes, + c_mses, + ) diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1 @@ + diff --git a/utils/helper.py b/utils/helper.py new file mode 100644 index 0000000..c8726eb --- /dev/null +++ b/utils/helper.py @@ -0,0 +1,41 @@ +import os + +class Save_Handle(object): + """handle the number of """ + def __init__(self, max_num): + self.save_list = [] + self.max_num = max_num + + def append(self, save_path): + if len(self.save_list) < self.max_num: + self.save_list.append(save_path) + else: + remove_path = self.save_list[0] + del self.save_list[0] + self.save_list.append(save_path) + if os.path.exists(remove_path): + os.remove(remove_path) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = 1.0 * self.sum / self.count + + def get_avg(self): + return self.avg + + def get_count(self): + return self.count diff --git a/utils/logger.py b/utils/logger.py new file mode 100644 index 0000000..9a0dc52 --- /dev/null +++ b/utils/logger.py @@ -0,0 +1,16 @@ +import logging + +def setlogger(path): + logger = logging.getLogger() + logger.setLevel(logging.INFO) + logFormatter = logging.Formatter("%(asctime)s %(message)s", + "%m-%d %H:%M:%S") + + fileHandler = logging.FileHandler(path) + fileHandler.setFormatter(logFormatter) + logger.addHandler(fileHandler) + + consoleHandler = logging.StreamHandler() + consoleHandler.setFormatter(logFormatter) + logger.addHandler(consoleHandler) + diff --git a/utils/regression_trainer.py b/utils/regression_trainer.py new file mode 100644 index 0000000..2937bcd --- /dev/null +++ b/utils/regression_trainer.py @@ -0,0 +1,165 @@ +from utils.trainer import Trainer +from utils.helper import Save_Handle, AverageMeter +import os +import sys +import time +import torch +from torch import optim +from torch.utils.data import DataLoader +from torch.utils.data.dataloader import default_collate +import logging +import numpy as np +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) +from models.vgg import vgg19 +from datasets.crowd_sh import Crowd +from losses.bay_loss import Bay_Loss +from losses.post_prob import Post_Prob + + +def train_collate(batch): + transposed_batch = list(zip(*batch)) + images = torch.stack(transposed_batch[0], 0) + points = transposed_batch[1] # the number of points is not fixed, keep it as a list of tensor + targets = transposed_batch[2] + st_sizes = torch.FloatTensor(transposed_batch[3]) + return images, points, targets, st_sizes + + +class RegTrainer(Trainer): + def setup(self): + """initial the datasets, model, loss and optimizer""" + args = self.args + if torch.cuda.is_available(): + self.device = torch.device("cuda") + self.device_count = torch.cuda.device_count() + # for code conciseness, we release the single gpu version + assert self.device_count == 1 + logging.info('using {} gpus'.format(self.device_count)) + else: + raise Exception("gpu is not available") + + self.downsample_ratio = args.downsample_ratio + self.datasets = {x: Crowd(os.path.join(args.data_dir, x), + args.crop_size, + args.downsample_ratio, + args.is_gray, x) for x in ['train', 'val']} + self.dataloaders = {x: DataLoader(self.datasets[x], + collate_fn=(train_collate + if x == 'train' else default_collate), + batch_size=(args.batch_size + if x == 'train' else 1), + shuffle=(True if x == 'train' else False), + num_workers=args.num_workers*self.device_count, + pin_memory=(True if x == 'train' else False)) + for x in ['train', 'val']} + self.model =vgg19() + self.model.to(self.device) + self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) + + self.start_epoch = 0 + if args.resume: + suf = args.resume.rsplit('.', 1)[-1] + if suf == 'tar': + checkpoint = torch.load(args.resume, self.device) + self.model.load_state_dict(checkpoint['model_state_dict']) + self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + self.start_epoch = checkpoint['epoch'] + 1 + elif suf == 'pth': + self.model.load_state_dict(torch.load(args.resume, self.device)) + + self.post_prob = Post_Prob(args.sigma, + args.crop_size, + args.downsample_ratio, + args.background_ratio, + args.use_background, + self.device) + self.criterion = Bay_Loss(args.use_background, self.device) + self.save_list = Save_Handle(max_num=args.max_model_num) + self.best_mae = np.inf + self.best_mse = np.inf + self.best_count = 0 + + def train(self): + """training process""" + args = self.args + for epoch in range(self.start_epoch, args.max_epoch): + logging.info('-'*5 + 'Epoch {}/{}'.format(epoch, args.max_epoch - 1) + '-'*5) + self.epoch = epoch + self.train_eopch() + if epoch % args.val_epoch == 0 and epoch >= args.val_start: + self.val_epoch() + + def train_eopch(self): + epoch_loss = AverageMeter() + epoch_mae = AverageMeter() + epoch_mse = AverageMeter() + epoch_start = time.time() + self.model.train() # Set model to training mode + + # Iterate over data. + for step, (inputs, points, targets, st_sizes) in enumerate(self.dataloaders['train']): + inputs = inputs.to(self.device) + st_sizes = st_sizes.to(self.device) + gd_count = np.array([len(p) for p in points], dtype=np.float32) + points = [p.to(self.device) for p in points] + targets = [t.to(self.device) for t in targets] + + with torch.set_grad_enabled(True): + outputs = self.model(inputs) + prob_list = self.post_prob(points, st_sizes) + loss = self.criterion(prob_list, targets, outputs) + + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + N = inputs.size(0) + pre_count = torch.sum(outputs.view(N, -1), dim=1).detach().cpu().numpy() + res = pre_count - gd_count + epoch_loss.update(loss.item(), N) + epoch_mse.update(np.mean(res * res), N) + epoch_mae.update(np.mean(abs(res)), N) + + logging.info('Epoch {} Train, Loss: {:.2f}, MSE: {:.2f} MAE: {:.2f}, Cost {:.1f} sec' + .format(self.epoch, epoch_loss.get_avg(), np.sqrt(epoch_mse.get_avg()), epoch_mae.get_avg(), + time.time()-epoch_start)) + model_state_dic = self.model.state_dict() + save_path = os.path.join(self.save_dir, '{}_ckpt.tar'.format(self.epoch)) + torch.save({ + 'epoch': self.epoch, + 'optimizer_state_dict': self.optimizer.state_dict(), + 'model_state_dict': model_state_dic + }, save_path) + self.save_list.append(save_path) # control the number of saved models + + def val_epoch(self): + epoch_start = time.time() + self.model.eval() # Set model to evaluate mode + epoch_res = [] + # Iterate over data. + for inputs, count, name in self.dataloaders['val']: + inputs = inputs.to(self.device) + # inputs are images with different sizes + assert inputs.size(0) == 1, 'the batch size should equal to 1 in validation mode' + with torch.set_grad_enabled(False): + outputs = self.model(inputs) + res = count[0].item() - torch.sum(outputs).item() + epoch_res.append(res) + + epoch_res = np.array(epoch_res) + mse = np.sqrt(np.mean(np.square(epoch_res))) + mae = np.mean(np.abs(epoch_res)) + logging.info('Epoch {} Val, MSE: {:.2f} MAE: {:.2f}, Cost {:.1f} sec' + .format(self.epoch, mse, mae, time.time()-epoch_start)) + + model_state_dic = self.model.state_dict() + if (2.0 * mse + mae) < (2.0 * self.best_mse + self.best_mae): + self.best_mse = mse + self.best_mae = mae + logging.info("save best mse {:.2f} mae {:.2f} model epoch {}".format(self.best_mse, + self.best_mae, + self.epoch)) + torch.save(model_state_dic, os.path.join(self.save_dir, 'best_model.pth')) + + + diff --git a/utils/trainer.py b/utils/trainer.py new file mode 100644 index 0000000..dbd6ab1 --- /dev/null +++ b/utils/trainer.py @@ -0,0 +1,24 @@ +import os +import logging +from datetime import datetime +from utils.logger import setlogger + + +class Trainer(object): + def __init__(self, args): + sub_dir = datetime.strftime(datetime.now(), '%m%d-%H%M%S') # prepare saving path + self.save_dir = os.path.join(args.save_dir, sub_dir) + if not os.path.exists(self.save_dir): + os.makedirs(self.save_dir) + setlogger(os.path.join(self.save_dir, 'train.log')) # set logger + for k, v in args.__dict__.items(): # save args + logging.info("{}: {}".format(k, v)) + self.args = args + + def setup(self): + """initial the datasets, model, loss and optimizer""" + pass + + def train(self): + """training one epoch""" + pass