|
23 | 23 |
|
24 | 24 | from groq import Groq, AsyncGroq, APIResponseValidationError
|
25 | 25 | from groq._types import Omit
|
26 |
| -from groq._utils import maybe_transform |
27 | 26 | from groq._models import BaseModel, FinalRequestOptions
|
28 |
| -from groq._constants import RAW_RESPONSE_HEADER |
29 | 27 | from groq._exceptions import GroqError, APIStatusError, APITimeoutError, APIResponseValidationError
|
30 | 28 | from groq._base_client import (
|
31 | 29 | DEFAULT_TIMEOUT,
|
|
35 | 33 | DefaultAsyncHttpxClient,
|
36 | 34 | make_request_options,
|
37 | 35 | )
|
38 |
| -from groq.types.chat.completion_create_params import CompletionCreateParams |
39 | 36 |
|
40 | 37 | from .utils import update_env
|
41 | 38 |
|
@@ -707,68 +704,37 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
|
707 | 704 |
|
708 | 705 | @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
|
709 | 706 | @pytest.mark.respx(base_url=base_url)
|
710 |
| - def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None: |
| 707 | + def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: Groq) -> None: |
711 | 708 | respx_mock.post("/openai/v1/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
|
712 | 709 |
|
713 | 710 | with pytest.raises(APITimeoutError):
|
714 |
| - self.client.post( |
715 |
| - "/openai/v1/chat/completions", |
716 |
| - body=cast( |
717 |
| - object, |
718 |
| - maybe_transform( |
719 |
| - dict( |
720 |
| - messages=[ |
721 |
| - { |
722 |
| - "role": "system", |
723 |
| - "content": "You are a helpful assistant.", |
724 |
| - }, |
725 |
| - { |
726 |
| - "role": "user", |
727 |
| - "content": "Explain the importance of low latency LLMs", |
728 |
| - }, |
729 |
| - ], |
730 |
| - model="llama3-8b-8192", |
731 |
| - ), |
732 |
| - CompletionCreateParams, |
733 |
| - ), |
734 |
| - ), |
735 |
| - cast_to=httpx.Response, |
736 |
| - options={"headers": {RAW_RESPONSE_HEADER: "stream"}}, |
737 |
| - ) |
| 711 | + client.chat.completions.with_streaming_response.create( |
| 712 | + messages=[ |
| 713 | + { |
| 714 | + "content": "content", |
| 715 | + "role": "system", |
| 716 | + } |
| 717 | + ], |
| 718 | + model="meta-llama/llama-4-scout-17b-16e-instruct", |
| 719 | + ).__enter__() |
738 | 720 |
|
739 | 721 | assert _get_open_connections(self.client) == 0
|
740 | 722 |
|
741 | 723 | @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
|
742 | 724 | @pytest.mark.respx(base_url=base_url)
|
743 |
| - def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None: |
| 725 | + def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: Groq) -> None: |
744 | 726 | respx_mock.post("/openai/v1/chat/completions").mock(return_value=httpx.Response(500))
|
745 | 727 |
|
746 | 728 | with pytest.raises(APIStatusError):
|
747 |
| - self.client.post( |
748 |
| - "/openai/v1/chat/completions", |
749 |
| - body=cast( |
750 |
| - object, |
751 |
| - maybe_transform( |
752 |
| - dict( |
753 |
| - messages=[ |
754 |
| - { |
755 |
| - "role": "system", |
756 |
| - "content": "You are a helpful assistant.", |
757 |
| - }, |
758 |
| - { |
759 |
| - "role": "user", |
760 |
| - "content": "Explain the importance of low latency LLMs", |
761 |
| - }, |
762 |
| - ], |
763 |
| - model="llama3-8b-8192", |
764 |
| - ), |
765 |
| - CompletionCreateParams, |
766 |
| - ), |
767 |
| - ), |
768 |
| - cast_to=httpx.Response, |
769 |
| - options={"headers": {RAW_RESPONSE_HEADER: "stream"}}, |
770 |
| - ) |
771 |
| - |
| 729 | + client.chat.completions.with_streaming_response.create( |
| 730 | + messages=[ |
| 731 | + { |
| 732 | + "content": "content", |
| 733 | + "role": "system", |
| 734 | + } |
| 735 | + ], |
| 736 | + model="meta-llama/llama-4-scout-17b-16e-instruct", |
| 737 | + ).__enter__() |
772 | 738 | assert _get_open_connections(self.client) == 0
|
773 | 739 |
|
774 | 740 | @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
|
@@ -1582,68 +1548,37 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte
|
1582 | 1548 |
|
1583 | 1549 | @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
|
1584 | 1550 | @pytest.mark.respx(base_url=base_url)
|
1585 |
| - async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None: |
| 1551 | + async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, async_client: AsyncGroq) -> None: |
1586 | 1552 | respx_mock.post("/openai/v1/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
|
1587 | 1553 |
|
1588 | 1554 | with pytest.raises(APITimeoutError):
|
1589 |
| - await self.client.post( |
1590 |
| - "/openai/v1/chat/completions", |
1591 |
| - body=cast( |
1592 |
| - object, |
1593 |
| - maybe_transform( |
1594 |
| - dict( |
1595 |
| - messages=[ |
1596 |
| - { |
1597 |
| - "role": "system", |
1598 |
| - "content": "You are a helpful assistant.", |
1599 |
| - }, |
1600 |
| - { |
1601 |
| - "role": "user", |
1602 |
| - "content": "Explain the importance of low latency LLMs", |
1603 |
| - }, |
1604 |
| - ], |
1605 |
| - model="llama3-8b-8192", |
1606 |
| - ), |
1607 |
| - CompletionCreateParams, |
1608 |
| - ), |
1609 |
| - ), |
1610 |
| - cast_to=httpx.Response, |
1611 |
| - options={"headers": {RAW_RESPONSE_HEADER: "stream"}}, |
1612 |
| - ) |
| 1555 | + await async_client.chat.completions.with_streaming_response.create( |
| 1556 | + messages=[ |
| 1557 | + { |
| 1558 | + "content": "content", |
| 1559 | + "role": "system", |
| 1560 | + } |
| 1561 | + ], |
| 1562 | + model="meta-llama/llama-4-scout-17b-16e-instruct", |
| 1563 | + ).__aenter__() |
1613 | 1564 |
|
1614 | 1565 | assert _get_open_connections(self.client) == 0
|
1615 | 1566 |
|
1616 | 1567 | @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
|
1617 | 1568 | @pytest.mark.respx(base_url=base_url)
|
1618 |
| - async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None: |
| 1569 | + async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, async_client: AsyncGroq) -> None: |
1619 | 1570 | respx_mock.post("/openai/v1/chat/completions").mock(return_value=httpx.Response(500))
|
1620 | 1571 |
|
1621 | 1572 | with pytest.raises(APIStatusError):
|
1622 |
| - await self.client.post( |
1623 |
| - "/openai/v1/chat/completions", |
1624 |
| - body=cast( |
1625 |
| - object, |
1626 |
| - maybe_transform( |
1627 |
| - dict( |
1628 |
| - messages=[ |
1629 |
| - { |
1630 |
| - "role": "system", |
1631 |
| - "content": "You are a helpful assistant.", |
1632 |
| - }, |
1633 |
| - { |
1634 |
| - "role": "user", |
1635 |
| - "content": "Explain the importance of low latency LLMs", |
1636 |
| - }, |
1637 |
| - ], |
1638 |
| - model="llama3-8b-8192", |
1639 |
| - ), |
1640 |
| - CompletionCreateParams, |
1641 |
| - ), |
1642 |
| - ), |
1643 |
| - cast_to=httpx.Response, |
1644 |
| - options={"headers": {RAW_RESPONSE_HEADER: "stream"}}, |
1645 |
| - ) |
1646 |
| - |
| 1573 | + await async_client.chat.completions.with_streaming_response.create( |
| 1574 | + messages=[ |
| 1575 | + { |
| 1576 | + "content": "content", |
| 1577 | + "role": "system", |
| 1578 | + } |
| 1579 | + ], |
| 1580 | + model="meta-llama/llama-4-scout-17b-16e-instruct", |
| 1581 | + ).__aenter__() |
1647 | 1582 | assert _get_open_connections(self.client) == 0
|
1648 | 1583 |
|
1649 | 1584 | @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
|
|
0 commit comments