1111# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
1212# License for the specific language governing permissions and limitations under
1313# the License.
14-
14+ import os
1515import pytest
16+ from datalake_api .v0 import reset_archive_querier
17+ from datalake_api import settings
1618from datalake .common import DatalakeRecord
1719from datalake .tests import generate_random_metadata
1820import simplejson as json
@@ -123,11 +125,36 @@ def query_latest(self, what, where):
123125 return HttpRecord (** record )
124126
125127
126- @pytest .fixture (params = [ArchiveQuerier , HttpQuerier ],
127- ids = ['archive_querier' , 'http' ])
128+
129+ @pytest .fixture (params = [
130+ ('archive' , 'use_latest' ),
131+ ('archive' , 'use_default' ),
132+ ('http' , 'use_latest' ),
133+ ('http' , 'use_default' )
134+ ], ids = ['archive-latest' ,
135+ 'archive-default' ,
136+ 'http-latest' ,
137+ 'http-default'
138+ ])
128139def querier (request , dynamodb ):
129- return request .param ('test' , dynamodb = dynamodb )
130140
141+ reset_archive_querier ()
142+ querier_type , table_usage = request .param
143+
144+ if table_usage == 'use_latest' :
145+ settings .DATALAKE_USE_LATEST_TABLE = True
146+ else :
147+ settings .DATALAKE_USE_LATEST_TABLE = False
148+
149+ if querier_type == 'http' :
150+ return HttpQuerier ('test' ,
151+ 'test_latest' ,
152+ dynamodb = dynamodb )
153+ else :
154+ return ArchiveQuerier ('test' ,
155+ 'test_latest' ,
156+ use_latest_table = True if table_usage == 'use_latest' else False ,
157+ dynamodb = dynamodb )
131158
132159def in_url (result , part ):
133160 url = result ['url' ]
@@ -407,6 +434,10 @@ def test_no_end(table_maker, querier, s3_file_from_metadata):
407434 url = 's3://datalake-test/' + m ['id' ]
408435 s3_file_from_metadata (url , m )
409436 records = DatalakeRecord .list_from_metadata (url , m )
437+ for record in records :
438+ what = record .get ('what' )
439+ where = record .get ('where' )
440+ record ['what_where_key' ] = f'{ what } :{ where } '
410441 table_maker (records )
411442 results = querier .query_by_time (m ['start' ], m ['start' ] + 1 , m ['what' ])
412443 assert len (results ) == 1
@@ -419,7 +450,12 @@ def test_no_end_exclusion(table_maker, querier, s3_file_from_metadata):
419450 url = 's3://datalake-test/' + m ['id' ]
420451 s3_file_from_metadata (url , m )
421452 records = DatalakeRecord .list_from_metadata (url , m )
453+ for record in records :
454+ what = record .get ('what' )
455+ where = record .get ('where' )
456+ record ['what_where_key' ] = f'{ what } :{ where } '
422457 table_maker (records )
458+
423459 results = querier .query_by_time (m ['start' ] + 1 , m ['start' ] + 2 , m ['what' ])
424460 assert len (results ) == 0
425461
@@ -478,8 +514,7 @@ def test_latest_creation_time_breaks_tie(table_maker, querier,
478514 start = bucket * DatalakeRecord .TIME_BUCKET_SIZE_IN_MS
479515 interval = DatalakeRecord .TIME_BUCKET_SIZE_IN_MS / 150
480516 end = start + interval
481- table = table_maker ([])
482-
517+ table = table_maker ([])[0 ]
483518 for i in range (3 ):
484519 record = record_maker (start = start ,
485520 end = end ,
@@ -528,3 +563,38 @@ def test_2x_max_results_in_one_bucket(table_maker, querier, record_maker):
528563 pages = get_all_pages (querier .query_by_time , [start , end , 'boo' ])
529564 results = consolidate_pages (pages )
530565 assert len (results ) == MAX_RESULTS * 2
566+
567+
568+ def test_latest_table_query (table_maker , querier , record_maker ):
569+ now = int (time .time () * 1000 )
570+ records = []
571+ bucket = int (now / DatalakeRecord .TIME_BUCKET_SIZE_IN_MS )
572+ start = bucket * DatalakeRecord .TIME_BUCKET_SIZE_IN_MS
573+ end = start
574+ for i in range (MAX_RESULTS ):
575+ records += record_maker (start = start ,
576+ end = end ,
577+ what = 'boo' ,
578+ where = 'hoo{}' .format (i ))
579+ table_maker (records )
580+ result = querier .query_latest ('boo' , 'hoo0' )
581+ _validate_latest_result (result , what = 'boo' , where = 'hoo0' )
582+
583+
584+ def test_query_latest_just_latest_table (table_maker , querier , record_maker ):
585+ use_latest_from_env = settings .DATALAKE_USE_LATEST_TABLE
586+ table = table_maker ([])[1 ]
587+ for i in range (3 ):
588+ record = record_maker (what = 'meow' ,
589+ where = f'tree' ,
590+ path = '/{}' .format (i ))
591+
592+ # only inserting into latest table
593+ table .put_item (Item = record [0 ])
594+ time .sleep (1.01 )
595+
596+ result = querier .query_latest ('meow' , 'tree' )
597+ if use_latest_from_env :
598+ _validate_latest_result (result , what = 'meow' , where = 'tree' )
599+ else :
600+ assert result is None
0 commit comments