|
11 | 11 | import joblib |
12 | 12 | import resource |
13 | 13 | import json |
| 14 | +import pickle |
14 | 15 | from os.path import isfile, splitext, exists |
15 | 16 | from pathlib import Path |
16 | 17 | import warnings |
|
43 | 44 | from uncoverml.scripts import superlearn_cli |
44 | 45 | from uncoverml.log_progress import write_progress_to_file |
45 | 46 |
|
| 47 | +import fiona |
| 48 | +from osgeo import gdal |
46 | 49 |
|
47 | 50 | log = logging.getLogger(__name__) |
48 | 51 | # warnings.showwarning = warn_with_traceback |
@@ -541,6 +544,62 @@ def upload(config_file, job_type): |
541 | 544 | uncoverml.interface_utils.read_presigned_urls_and_upload(config, job_type) |
542 | 545 | write_progress_to_file(job_type, 'Upload to AWS complete', config) |
543 | 546 |
|
| 547 | +@cli.command() |
| 548 | +@click.argument('config_file') |
| 549 | +@click.argument('job_type') |
| 550 | +def clip(config_file, job_type): |
| 551 | + config = ls.config.Config(config_file) |
| 552 | + write_progress_to_file(job_type, 'Clipping covariates to selected train/predict area.', config) |
| 553 | + |
| 554 | + train_shp_file_path = config.target_file |
| 555 | + train_shp_parent = str(Path(train_shp_file_path).parent) |
| 556 | + train_shp_name = Path(train_shp_file_path).name |
| 557 | + |
| 558 | + dataset = fiona.open(train_shp_file_path) |
| 559 | + dataset = dataset.bounds |
| 560 | + |
| 561 | + dataset_0 = dataset[0] |
| 562 | + dataset_1 = dataset[1] |
| 563 | + dataset_2 = dataset[2] |
| 564 | + dataset_3 = dataset[3] |
| 565 | + |
| 566 | + dataset_0 -= abs(dataset[0]) * 1/100 |
| 567 | + dataset_1 -= abs(dataset[1]) * 1/100 |
| 568 | + dataset_2 += abs(dataset[2]) * 1/100 |
| 569 | + dataset_3 += abs(dataset[3]) * 1/100 |
| 570 | + |
| 571 | + clip_file_paths = [] |
| 572 | + for feature_set in config.feature_sets: |
| 573 | + for file in feature_set.files: |
| 574 | + file_name = Path(file).name |
| 575 | + ds = gdal.Open(file) |
| 576 | + write_progress_to_file(job_type, f'Clipping covariates file: {file}', config) |
| 577 | + covariate_file_path_clip = f"{train_shp_parent}/{str(file_name).replace('.tif', '.tif')}" |
| 578 | + ds = gdal.Translate(covariate_file_path_clip, ds, projWin = [dataset_0, dataset_3, dataset_2, dataset_1]) |
| 579 | + ds = None |
| 580 | + clip_file_paths.append(covariate_file_path_clip) |
| 581 | + |
| 582 | + # Update covariates.txt file, replacing tifs with clipped tifs. |
| 583 | + write_progress_to_file(job_type, 'Clipping covariates updating covariates.txt with new file paths', config) |
| 584 | + covariates_file_path = f'{train_shp_parent}/covariates.txt' |
| 585 | + with open(covariates_file_path, 'w') as covariates_file: |
| 586 | + for clip_file_path in clip_file_paths: |
| 587 | + covariates_file.write(f'{clip_file_path}\n') |
| 588 | + |
| 589 | + # Update covariates.pkl file, replacing tif file paths with clipped tifs. |
| 590 | + write_progress_to_file(job_type, 'Clipping covariates updating covariates.pkl with new file paths', config) |
| 591 | + covariate_list = [] |
| 592 | + covariates_file_path_pickle = f'{train_shp_parent}/covariate_list.pkl' |
| 593 | + with open(covariates_file_path_pickle, 'rb') as in_file: |
| 594 | + covariate_list = pickle.load(in_file) |
| 595 | + for covariate in covariate_list: |
| 596 | + covariate_name = Path(covariate['nci_path']).name |
| 597 | + covariate['nci_path'] = f'{train_shp_parent}/{covariate_name}' |
| 598 | + |
| 599 | + with open(covariates_file_path_pickle, 'wb') as out_file: |
| 600 | + pickle.dump(covariate_list, out_file) |
| 601 | + |
| 602 | + write_progress_to_file(job_type, 'Clipping covariates completed', config) |
544 | 603 |
|
545 | 604 | def __validate_pca_config(config): |
546 | 605 | # assert no other transforms other than whiten |
|
0 commit comments