Skip to content

Commit df59389

Browse files
author
RuslanBergenov
committed
Merge branch 'feature/get-dataset-without-create'
# Conflicts: # .github/workflows/python-package.yml
2 parents 842923d + a7b90b4 commit df59389

File tree

3 files changed

+94
-9
lines changed

3 files changed

+94
-9
lines changed

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ env:
2626

2727
on:
2828
push:
29-
branches: [development, master, feature/incremental_sync_and_schema_logging]
29+
branches: [development, master]
3030
pull_request:
3131
branches: [ master ]
3232

target_bigquery/utils.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from google.api_core import exceptions
77
from google.cloud import bigquery
88
from google.cloud.bigquery import Dataset
9+
from google.cloud.exceptions import NotFound
10+
from google.cloud.bigquery import DatasetReference
911

1012
logger = singer.get_logger()
1113

@@ -30,7 +32,7 @@ def emit_state(state):
3032

3133
def ensure_dataset(project_id, dataset_id, location):
3234
"""
33-
Given a project id, dataset id and location, creates BigQuery dataset
35+
Given a project id, dataset id and location, creates BigQuery dataset if not exists
3436
3537
https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html
3638
@@ -39,17 +41,20 @@ def ensure_dataset(project_id, dataset_id, location):
3941
:param location, str: location for the dataset (US). Passed to bigquery.Client().
4042
:return: client (BigQuery Client Object) and Dataset (BigQuery dataset)
4143
"""
42-
from google.cloud.bigquery import DatasetReference
44+
4345
client = bigquery.Client(project=project_id, location=location)
4446

4547
dataset_ref = DatasetReference(project_id, dataset_id)
48+
4649
try:
47-
client.create_dataset(dataset_ref)
48-
except exceptions.GoogleAPICallError as e:
49-
if e.response.status_code == 409: # dataset exists
50-
pass
51-
else:
50+
dataset = client.get_dataset(dataset_ref)
51+
return client, dataset
52+
53+
except NotFound:
54+
try:
55+
client.create_dataset(dataset_ref)
56+
except exceptions.GoogleAPICallError as e:
5257
logger.critical(f"unable to create dataset {dataset_id} in project {project_id}; Exception {e}")
5358
return 2 # sys.exit(2)
5459

55-
return client, Dataset(dataset_ref)
60+
return client, Dataset(dataset_ref)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import os
2+
import json
3+
from google.cloud.bigquery import Dataset
4+
from google.cloud import bigquery
5+
from google.cloud.exceptions import NotFound
6+
import pytest
7+
import logging
8+
9+
from tests import unittestcore
10+
from target_bigquery.utils import ensure_dataset
11+
12+
logging.basicConfig(level=logging.DEBUG)
13+
logger = logging.getLogger(__name__)
14+
15+
16+
class TestTargetBigQueryUtils(unittestcore.BaseUnitTest):
17+
18+
def setUp(self):
19+
20+
self.config_file = os.path.join(
21+
os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'sandbox'),
22+
'target-config.json')
23+
24+
config = json.load(open(self.config_file))
25+
26+
self.project_id = config["project_id"]
27+
28+
self.dataset_id = "target_bigquery_unit_test_ensure_dataset_function" # config["dataset_id"]
29+
30+
self.location = config.get("location", "US")
31+
32+
self.client = bigquery.Client(project=self.project_id)
33+
34+
def test_ensure_dataset(self):
35+
"""
36+
the purpose of this test is to show that the dataset is obtained, if it already exists
37+
if it doesn't exist then it's created and then it is obtained
38+
"""
39+
40+
# make sure dataset doesn't exist yet
41+
logger.info("Dataset doesn't exist yet")
42+
self.delete_dataset()
43+
44+
# assert that dataset doesn't exist yet
45+
with pytest.raises(NotFound):
46+
self.client.get_dataset(self.dataset_id) # Make an API request.
47+
48+
# PART 1
49+
# create dataset and get dataset
50+
client_1, dataset_newly_created = ensure_dataset(project_id=self.project_id,
51+
dataset_id=self.dataset_id,
52+
location=self.location)
53+
54+
# PART 2 (identical code to part 1, but now the dataset already exists)
55+
# get dataset if dataset already exists
56+
client_2, dataset_already_exists = ensure_dataset(project_id=self.project_id,
57+
dataset_id=self.dataset_id,
58+
location=self.location)
59+
# PART 3: checks
60+
dataset_list = [dataset_newly_created, dataset_already_exists]
61+
62+
for next_dataset in dataset_list:
63+
dataset_dict = next_dataset.__dict__
64+
65+
assert type(next_dataset) == Dataset
66+
assert dataset_dict["_properties"]["datasetReference"]["projectId"] == self.project_id
67+
assert dataset_dict["_properties"]["datasetReference"]["datasetId"] == self.dataset_id
68+
69+
def tearDown(self):
70+
self.delete_dataset()
71+
72+
def delete_dataset(self):
73+
try:
74+
self.client.delete_dataset(
75+
dataset=self.dataset_id,
76+
delete_contents=True
77+
)
78+
except Exception as e:
79+
print(e)
80+
pass

0 commit comments

Comments
 (0)