Skip to content

Commit

Permalink
Add utility to prevent double execution of certain cells
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Nov 14, 2024
1 parent f61c008 commit 0619a09
Showing 1 changed file with 153 additions and 8 deletions.
161 changes: 153 additions & 8 deletions ch06/01_main-chapter-code/ch06.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,28 @@
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/ch06_compressed/chapter-overview.webp\" width=500px>"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "946c3e56-b04b-4b0f-b35f-b485ce5b28df",
"metadata": {},
"outputs": [],
"source": [
"# Utility to prevent certain cells from being executed twice\n",
"\n",
"from IPython.core.magic import register_line_cell_magic\n",
"\n",
"executed_cells = set()\n",
"\n",
"@register_line_cell_magic\n",
"def run_once(line, cell):\n",
" if line not in executed_cells:\n",
" get_ipython().run_cell(cell)\n",
" executed_cells.add(line)\n",
" else:\n",
" print(f\"Cell '{line}' has already been executed.\")"
]
},
{
"cell_type": "markdown",
"id": "3a84cf35-b37f-4c15-8972-dfafc9fadc1c",
Expand Down Expand Up @@ -167,7 +189,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "def7c09b-af9c-4216-90ce-5e67aed1065c",
"metadata": {
"colab": {
Expand All @@ -181,7 +203,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"File downloaded and saved as sms_spam_collection/SMSSpamCollection.tsv\n"
"sms_spam_collection/SMSSpamCollection.tsv already exists. Skipping download and extraction.\n"
]
}
],
Expand Down Expand Up @@ -230,7 +252,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "da0ed4da-ac31-4e4d-8bdd-2153be4656a4",
"metadata": {
"colab": {
Expand Down Expand Up @@ -344,7 +366,7 @@
"[5572 rows x 2 columns]"
]
},
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -368,7 +390,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "495a5280-9d7c-41d4-9719-64ab99056d4c",
"metadata": {
"colab": {
Expand Down Expand Up @@ -406,7 +428,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "7be4a0a2-9704-4a96-b38f-240339818688",
"metadata": {
"colab": {
Expand All @@ -428,6 +450,9 @@
}
],
"source": [
"%%run_once balance_df\n",
"\n",
"\n",
"def create_balanced_dataset(df):\n",
" \n",
" # Count the instances of \"spam\"\n",
Expand All @@ -441,6 +466,7 @@
"\n",
" return balanced_df\n",
"\n",
"\n",
"balanced_df = create_balanced_dataset(df)\n",
"print(balanced_df[\"Label\"].value_counts())"
]
Expand All @@ -457,14 +483,133 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "c1b10c3d-5d57-42d0-8de8-cf80a06f5ffd",
"metadata": {
"id": "c1b10c3d-5d57-42d0-8de8-cf80a06f5ffd"
},
"outputs": [],
"source": [
"balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1})"
"%%run_once label_mapping\n",
"balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1}) "
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "e6f7f062-ef4e-4020-8275-71990cab4414",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Label</th>\n",
" <th>Text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4307</th>\n",
" <td>0</td>\n",
" <td>Awww dat is sweet! We can think of something t...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4138</th>\n",
" <td>0</td>\n",
" <td>Just got to &amp;lt;#&amp;gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4831</th>\n",
" <td>0</td>\n",
" <td>The word \"Checkmate\" in chess comes from the P...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4461</th>\n",
" <td>0</td>\n",
" <td>This is wishing you a great day. Moji told me ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5440</th>\n",
" <td>0</td>\n",
" <td>Thank you. do you generally date the brothas?</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5537</th>\n",
" <td>1</td>\n",
" <td>Want explicit SEX in 30 secs? Ring 02073162414...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5540</th>\n",
" <td>1</td>\n",
" <td>ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5547</th>\n",
" <td>1</td>\n",
" <td>Had your contract mobile 11 Mnths? Latest Moto...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5566</th>\n",
" <td>1</td>\n",
" <td>REMINDER FROM O2: To get 2.50 pounds free call...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5567</th>\n",
" <td>1</td>\n",
" <td>This is the 2nd time we have tried 2 contact u...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1494 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Label Text\n",
"4307 0 Awww dat is sweet! We can think of something t...\n",
"4138 0 Just got to &lt;#&gt;\n",
"4831 0 The word \"Checkmate\" in chess comes from the P...\n",
"4461 0 This is wishing you a great day. Moji told me ...\n",
"5440 0 Thank you. do you generally date the brothas?\n",
"... ... ...\n",
"5537 1 Want explicit SEX in 30 secs? Ring 02073162414...\n",
"5540 1 ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE ...\n",
"5547 1 Had your contract mobile 11 Mnths? Latest Moto...\n",
"5566 1 REMINDER FROM O2: To get 2.50 pounds free call...\n",
"5567 1 This is the 2nd time we have tried 2 contact u...\n",
"\n",
"[1494 rows x 2 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"balanced_df"
]
},
{
Expand Down

0 comments on commit 0619a09

Please sign in to comment.