|
| 1 | +from unittest.mock import Mock, patch |
| 2 | + |
| 3 | +import pytest |
| 4 | +from langflow.components.youtube.youtube_transcripts import YouTubeTranscriptsComponent |
| 5 | +from langflow.schema import Data, DataFrame, Message |
| 6 | +from youtube_transcript_api import NoTranscriptFound, TranscriptsDisabled |
| 7 | + |
| 8 | +from tests.base import ComponentTestBaseWithoutClient |
| 9 | + |
| 10 | + |
| 11 | +class TestYouTubeTranscriptsComponent(ComponentTestBaseWithoutClient): |
| 12 | + @pytest.fixture |
| 13 | + def component_class(self): |
| 14 | + """Return the component class to test.""" |
| 15 | + return YouTubeTranscriptsComponent |
| 16 | + |
| 17 | + @pytest.fixture |
| 18 | + def default_kwargs(self): |
| 19 | + """Return the default kwargs for the component.""" |
| 20 | + return { |
| 21 | + "url": "https://www.youtube.com/watch?v=test123", |
| 22 | + "chunk_size_seconds": 60, |
| 23 | + "translation": "", |
| 24 | + } |
| 25 | + |
| 26 | + @pytest.fixture |
| 27 | + def file_names_mapping(self): |
| 28 | + """Return the file names mapping for different versions.""" |
| 29 | + return [] |
| 30 | + |
| 31 | + @pytest.fixture |
| 32 | + def mock_transcript_data(self): |
| 33 | + """Return mock transcript data for testing.""" |
| 34 | + return [ |
| 35 | + Mock(page_content="First part of the transcript", metadata={"start_seconds": 0}), |
| 36 | + Mock(page_content="Second part of the transcript", metadata={"start_seconds": 60}), |
| 37 | + ] |
| 38 | + |
| 39 | + def test_basic_setup(self, component_class, default_kwargs): |
| 40 | + """Test basic component initialization.""" |
| 41 | + component = component_class() |
| 42 | + component.set_attributes(default_kwargs) |
| 43 | + assert component.url == default_kwargs["url"] |
| 44 | + assert component.chunk_size_seconds == default_kwargs["chunk_size_seconds"] |
| 45 | + assert component.translation == default_kwargs["translation"] |
| 46 | + |
| 47 | + @patch("langflow.components.youtube.youtube_transcripts.YoutubeLoader") |
| 48 | + def test_get_dataframe_output_success(self, mock_loader, component_class, default_kwargs, mock_transcript_data): |
| 49 | + """Test successful DataFrame output generation.""" |
| 50 | + mock_loader.from_youtube_url.return_value.load.return_value = mock_transcript_data |
| 51 | + |
| 52 | + component = component_class() |
| 53 | + component.set_attributes(default_kwargs) |
| 54 | + result = component.get_dataframe_output() |
| 55 | + |
| 56 | + assert isinstance(result, DataFrame) |
| 57 | + result_df = result # More descriptive variable name |
| 58 | + assert len(result_df) == 2 |
| 59 | + assert list(result_df.columns) == ["timestamp", "text"] |
| 60 | + assert result_df.iloc[0]["timestamp"] == "00:00" |
| 61 | + assert result_df.iloc[1]["timestamp"] == "01:00" |
| 62 | + assert result_df.iloc[0]["text"] == "First part of the transcript" |
| 63 | + |
| 64 | + @patch("langflow.components.youtube.youtube_transcripts.YoutubeLoader") |
| 65 | + def test_get_message_output_success(self, mock_loader, component_class, default_kwargs, mock_transcript_data): |
| 66 | + """Test successful Message output generation.""" |
| 67 | + mock_loader.from_youtube_url.return_value.load.return_value = mock_transcript_data |
| 68 | + |
| 69 | + component = component_class() |
| 70 | + component.set_attributes(default_kwargs) |
| 71 | + result = component.get_message_output() |
| 72 | + |
| 73 | + assert isinstance(result, Message) |
| 74 | + assert result.text == "First part of the transcript" |
| 75 | + |
| 76 | + @patch("langflow.components.youtube.youtube_transcripts.YoutubeLoader") |
| 77 | + def test_get_data_output_success(self, mock_loader, component_class, default_kwargs, mock_transcript_data): |
| 78 | + """Test successful Data output generation.""" |
| 79 | + mock_loader.from_youtube_url.return_value.load.return_value = mock_transcript_data |
| 80 | + |
| 81 | + component = component_class() |
| 82 | + component.set_attributes(default_kwargs) |
| 83 | + result = component.get_data_output() |
| 84 | + |
| 85 | + assert isinstance(result, Data) |
| 86 | + assert result.data["video_url"] == default_kwargs["url"] |
| 87 | + assert result.data["transcript"] == "First part of the transcript Second part of the transcript" |
| 88 | + assert "error" not in result.data |
| 89 | + |
| 90 | + @patch("langflow.components.youtube.youtube_transcripts.YoutubeLoader") |
| 91 | + def test_transcript_disabled_error(self, mock_loader, component_class, default_kwargs): |
| 92 | + """Test handling of TranscriptsDisabled error.""" |
| 93 | + error_message = "Transcripts are disabled for this video" |
| 94 | + |
| 95 | + # Mock the load method to raise TranscriptsDisabled |
| 96 | + def raise_error(*_): # Use underscore to indicate unused arguments |
| 97 | + raise TranscriptsDisabled(error_message) |
| 98 | + |
| 99 | + mock_loader.from_youtube_url.return_value.load.side_effect = raise_error |
| 100 | + |
| 101 | + component = component_class() |
| 102 | + component.set_attributes(default_kwargs) |
| 103 | + |
| 104 | + # Test DataFrame output |
| 105 | + df_result = component.get_dataframe_output() |
| 106 | + assert isinstance(df_result, DataFrame) |
| 107 | + assert len(df_result) == 1 # One row for error message |
| 108 | + assert "error" in df_result.columns |
| 109 | + assert "Failed to get YouTube transcripts" in df_result["error"][0] |
| 110 | + |
| 111 | + # Test Message output |
| 112 | + msg_result = component.get_message_output() |
| 113 | + assert isinstance(msg_result, Message) |
| 114 | + assert "Failed to get YouTube transcripts" in msg_result.text |
| 115 | + |
| 116 | + # Test Data output |
| 117 | + data_result = component.get_data_output() |
| 118 | + assert isinstance(data_result, Data) |
| 119 | + assert "error" in data_result.data |
| 120 | + assert data_result.data["transcript"] == "" |
| 121 | + |
| 122 | + @patch("langflow.components.youtube.youtube_transcripts.YoutubeLoader") |
| 123 | + def test_no_transcript_found_error(self, mock_loader, component_class, default_kwargs): |
| 124 | + """Test handling of NoTranscriptFound error.""" |
| 125 | + video_id = "test123" |
| 126 | + requested_langs = ["en"] |
| 127 | + transcript_data = {"en": {"translationLanguages": []}} |
| 128 | + |
| 129 | + # Mock the load method to raise NoTranscriptFound |
| 130 | + def raise_error(*_): # Use underscore to indicate unused arguments |
| 131 | + raise NoTranscriptFound(video_id, requested_langs, transcript_data) |
| 132 | + |
| 133 | + mock_loader.from_youtube_url.return_value.load.side_effect = raise_error |
| 134 | + |
| 135 | + component = component_class() |
| 136 | + component.set_attributes(default_kwargs) |
| 137 | + |
| 138 | + data_result = component.get_data_output() |
| 139 | + assert isinstance(data_result, Data) |
| 140 | + assert "error" in data_result.data |
| 141 | + assert data_result.data["transcript"] == "" |
| 142 | + |
| 143 | + def test_translation_setting(self, component_class): |
| 144 | + """Test setting different translation languages.""" |
| 145 | + component = component_class() |
| 146 | + test_cases = ["en", "es", "fr", ""] |
| 147 | + |
| 148 | + for lang in test_cases: |
| 149 | + component.set_attributes({"url": "https://youtube.com/watch?v=test", "translation": lang}) |
| 150 | + assert component.translation == lang |
| 151 | + |
| 152 | + @patch("langflow.components.youtube.youtube_transcripts.YoutubeLoader") |
| 153 | + def test_empty_transcript_handling(self, mock_loader, component_class, default_kwargs): |
| 154 | + """Test handling of empty transcript response.""" |
| 155 | + mock_loader.from_youtube_url.return_value.load.return_value = [] |
| 156 | + |
| 157 | + component = component_class() |
| 158 | + component.set_attributes(default_kwargs) |
| 159 | + |
| 160 | + # Test Data output with empty transcript |
| 161 | + data_result = component.get_data_output() |
| 162 | + assert data_result.data["error"] == "No transcripts found." |
| 163 | + assert data_result.data["transcript"] == "" |
| 164 | + |
| 165 | + # Test DataFrame output with empty transcript |
| 166 | + df_result = component.get_dataframe_output() |
| 167 | + assert len(df_result) == 0 |
0 commit comments