Skip to content

Commit d4b497f

Browse files
authored
Merge pull request #141 from codeforjapan/feature/138
Add search endpoint logic.
2 parents 222de8c + 1d8ff13 commit d4b497f

File tree

6 files changed

+752
-154
lines changed

6 files changed

+752
-154
lines changed

api/birdxplorer_api/openapi_doc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ class FastAPIEndpointDocs(Generic[_KEY]):
527527
},
528528
)
529529

530-
v1_data_post_favorite_count = FastAPIEndpointParamDocs(
530+
v1_data_post_like_count = FastAPIEndpointParamDocs(
531531
description="Postのお気に入り数。",
532532
openapi_examples={
533533
"single": {
@@ -583,7 +583,7 @@ class FastAPIEndpointDocs(Generic[_KEY]):
583583
"x_user_name": v1_data_x_user_name,
584584
"x_user_followers_count_from": v1_data_x_user_follower_count,
585585
"x_user_follow_count_from": v1_data_x_user_follow_count,
586-
"post_favorite_count_from": v1_data_post_favorite_count,
586+
"post_like_count_from": v1_data_post_like_count,
587587
"post_repost_count_from": v1_data_post_repost_count,
588588
"post_impression_count_from": v1_data_post_impression_count,
589589
"post_includes_media": v1_data_post_includes_media,

api/birdxplorer_api/routers/data.py

Lines changed: 103 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import timezone
22
from typing import List, TypeAlias, Union
3+
from urllib.parse import urlencode
34

45
from dateutil.parser import parse as dateutil_parse
56
from fastapi import APIRouter, HTTPException, Path, Query, Request
@@ -275,7 +276,11 @@ def str_to_twitter_timestamp(s: str) -> TwitterTimestamp:
275276

276277

277278
def ensure_twitter_timestamp(t: Union[str, TwitterTimestamp]) -> TwitterTimestamp:
278-
return str_to_twitter_timestamp(t) if isinstance(t, str) else t
279+
try:
280+
timestamp = str_to_twitter_timestamp(t) if isinstance(t, str) else t
281+
return timestamp
282+
except OverflowError:
283+
raise OverflowError("Timestamp out of range")
279284

280285

281286
def gen_router(storage: Storage) -> APIRouter:
@@ -314,10 +319,13 @@ def get_notes(
314319
language: Union[LanguageIdentifier, None] = Query(default=None, **V1DataNotesDocs.params["language"]),
315320
search_text: Union[None, str] = Query(default=None, **V1DataNotesDocs.params["search_text"]),
316321
) -> NoteListResponse:
317-
if created_at_from is not None and isinstance(created_at_from, str):
318-
created_at_from = ensure_twitter_timestamp(created_at_from)
319-
if created_at_to is not None and isinstance(created_at_to, str):
320-
created_at_to = ensure_twitter_timestamp(created_at_to)
322+
try:
323+
if created_at_from is not None and isinstance(created_at_from, str):
324+
created_at_from = ensure_twitter_timestamp(created_at_from)
325+
if created_at_to is not None and isinstance(created_at_to, str):
326+
created_at_to = ensure_twitter_timestamp(created_at_to)
327+
except OverflowError as e:
328+
raise HTTPException(status_code=422, detail=str(e))
321329

322330
notes = list(
323331
storage.get_notes(
@@ -374,10 +382,14 @@ def get_posts(
374382
search_url: Union[None, HttpUrl] = Query(default=None, **V1DataPostsDocs.params["search_url"]),
375383
media: bool = Query(default=True, **V1DataPostsDocs.params["media"]),
376384
) -> PostListResponse:
377-
if created_at_from is not None and isinstance(created_at_from, str):
378-
created_at_from = ensure_twitter_timestamp(created_at_from)
379-
if created_at_to is not None and isinstance(created_at_to, str):
380-
created_at_to = ensure_twitter_timestamp(created_at_to)
385+
try:
386+
if created_at_from is not None and isinstance(created_at_from, str):
387+
created_at_from = ensure_twitter_timestamp(created_at_from)
388+
if created_at_to is not None and isinstance(created_at_to, str):
389+
created_at_to = ensure_twitter_timestamp(created_at_to)
390+
except OverflowError as e:
391+
raise HTTPException(status_code=422, detail=str(e))
392+
381393
posts = list(
382394
storage.get_posts(
383395
post_ids=post_ids,
@@ -417,6 +429,7 @@ def get_posts(
417429

418430
@router.get("/search", description=V1DataSearchDocs.description, response_model=SearchResponse)
419431
def search(
432+
request: Request,
420433
note_includes_text: Union[None, str] = Query(default=None, **V1DataSearchDocs.params["note_includes_text"]),
421434
note_excludes_text: Union[None, str] = Query(default=None, **V1DataSearchDocs.params["note_excludes_text"]),
422435
post_includes_text: Union[None, str] = Query(default=None, **V1DataSearchDocs.params["post_includes_text"]),
@@ -437,9 +450,7 @@ def search(
437450
x_user_follow_count_from: Union[None, int] = Query(
438451
default=None, **V1DataSearchDocs.params["x_user_follow_count_from"]
439452
),
440-
post_favorite_count_from: Union[None, int] = Query(
441-
default=None, **V1DataSearchDocs.params["post_favorite_count_from"]
442-
),
453+
post_like_count_from: Union[None, int] = Query(default=None, **V1DataSearchDocs.params["post_like_count_from"]),
443454
post_repost_count_from: Union[None, int] = Query(
444455
default=None, **V1DataSearchDocs.params["post_repost_count_from"]
445456
),
@@ -450,59 +461,88 @@ def search(
450461
offset: int = Query(default=0, ge=0, **V1DataSearchDocs.params["offset"]),
451462
limit: int = Query(default=100, gt=0, le=1000, **V1DataSearchDocs.params["limit"]),
452463
) -> SearchResponse:
453-
return SearchResponse(
454-
data=[
464+
# Convert timestamp strings to TwitterTimestamp objects
465+
try:
466+
if note_created_at_from is not None and isinstance(note_created_at_from, str):
467+
note_created_at_from = ensure_twitter_timestamp(note_created_at_from)
468+
if note_created_at_to is not None and isinstance(note_created_at_to, str):
469+
note_created_at_to = ensure_twitter_timestamp(note_created_at_to)
470+
except OverflowError as e:
471+
raise HTTPException(status_code=422, detail=str(e))
472+
473+
# Get search results using the optimized storage method
474+
results = []
475+
for note, post in storage.search_notes_with_posts(
476+
note_includes_text=note_includes_text,
477+
note_excludes_text=note_excludes_text,
478+
post_includes_text=post_includes_text,
479+
post_excludes_text=post_excludes_text,
480+
language=language,
481+
topic_ids=topic_ids,
482+
note_status=note_status,
483+
note_created_at_from=note_created_at_from,
484+
note_created_at_to=note_created_at_to,
485+
x_user_names=x_user_names,
486+
x_user_followers_count_from=x_user_followers_count_from,
487+
x_user_follow_count_from=x_user_follow_count_from,
488+
post_like_count_from=post_like_count_from,
489+
post_repost_count_from=post_repost_count_from,
490+
post_impression_count_from=post_impression_count_from,
491+
post_includes_media=post_includes_media,
492+
offset=offset,
493+
limit=limit,
494+
):
495+
results.append(
455496
SearchedNote(
456-
noteId="1845672983001710655",
457-
language="ja",
458-
topics=[
459-
{
460-
"topicId": 26,
461-
"label": {"ja": "セキュリティ上の脅威", "en": "security threat"},
462-
"referenceCount": 0,
463-
},
464-
{"topicId": 47, "label": {"ja": "検閲", "en": "Censorship"}, "referenceCount": 0},
465-
{"topicId": 51, "label": {"ja": "テクノロジー", "en": "technology"}, "referenceCount": 0},
466-
],
467-
postId="1846718284369912064",
468-
summary="Content Security Policyは情報の持ち出しを防止する仕組みではありません。コンテンツインジェクションの脆弱性のリスクを軽減する仕組みです。適切なContent Security Policyがレスポンスヘッダーに設定されている場合でも、外部への通信をブロックできない点に注意が必要です。 Content Security Policy Level 3 https://w3c.github.io/webappsec-csp/", # noqa: E501
469-
current_status="NEEDS_MORE_RATINGS",
470-
created_at=1728877704750,
471-
post={
472-
"postId": "1846718284369912064",
473-
"xUserId": "90954365",
474-
"xUser": {
475-
"userId": "90954365",
476-
"name": "earthquakejapan",
477-
"profileImage": "https://pbs.twimg.com/profile_images/1638600342/japan_rel96_normal.jpg",
478-
"followersCount": 162934,
479-
"followingCount": 6,
480-
},
481-
"text": "今後48時間以内に日本ではマグニチュード6.0の地震が発生する可能性があります。地図をご覧ください。",
482-
"mediaDetails": [
483-
{
484-
"mediaKey": "3_1846718279236177920-1846718284369912064",
485-
"type": "photo",
486-
"url": "https://pbs.twimg.com/media/GaDcfZoX0AAko2-.jpg",
487-
"width": 900,
488-
"height": 738,
489-
}
490-
],
491-
"createdAt": 1729094524000,
492-
"likeCount": 451,
493-
"repostCount": 104,
494-
"impressionCount": 82378,
495-
"links": [
496-
{
497-
"linkId": "9c139b99-8111-e4f0-ad41-fc9e40d08722",
498-
"url": "https://www.quakeprediction.com/Earthquake%20Forecast%20Japan.html",
499-
}
500-
],
501-
"link": "https://x.com/earthquakejapan/status/1846718284369912064",
502-
},
497+
noteId=note.note_id,
498+
language=note.language,
499+
topics=note.topics,
500+
postId=note.post_id,
501+
summary=note.summary,
502+
current_status=note.current_status,
503+
created_at=note.created_at,
504+
post=post,
503505
)
504-
],
505-
meta=PaginationMeta(next=None, prev=None),
506+
)
507+
508+
# Get total count for pagination
509+
total_count = storage.count_search_results(
510+
note_includes_text=note_includes_text,
511+
note_excludes_text=note_excludes_text,
512+
post_includes_text=post_includes_text,
513+
post_excludes_text=post_excludes_text,
514+
language=language,
515+
topic_ids=topic_ids,
516+
note_status=note_status,
517+
note_created_at_from=note_created_at_from,
518+
note_created_at_to=note_created_at_to,
519+
x_user_names=x_user_names,
520+
x_user_followers_count_from=x_user_followers_count_from,
521+
x_user_follow_count_from=x_user_follow_count_from,
522+
post_like_count_from=post_like_count_from,
523+
post_repost_count_from=post_repost_count_from,
524+
post_impression_count_from=post_impression_count_from,
525+
post_includes_media=post_includes_media,
506526
)
507527

528+
# Generate pagination URLs
529+
base_url = str(request.url).split("?")[0]
530+
query_params = dict(request.query_params)
531+
next_offset = offset + limit
532+
prev_offset = max(offset - limit, 0)
533+
534+
next_url = None
535+
if next_offset < total_count:
536+
query_params["offset"] = str(next_offset)
537+
query_params["limit"] = str(limit)
538+
next_url = f"{base_url}?{urlencode(query_params)}"
539+
540+
prev_url = None
541+
if offset > 0:
542+
query_params["offset"] = str(prev_offset)
543+
query_params["limit"] = str(limit)
544+
prev_url = f"{base_url}?{urlencode(query_params)}"
545+
546+
return SearchResponse(data=results, meta=PaginationMeta(next=next_url, prev=prev_url))
547+
508548
return router

0 commit comments

Comments
 (0)