|
8 | 8 | "outputs": [], |
9 | 9 | "source": [ |
10 | 10 | "import polars as pl\n", |
11 | | - "import polars_istr # noqa: F401" |
| 11 | + "import polars_istr as istr" |
12 | 12 | ] |
13 | 13 | }, |
14 | 14 | { |
|
35 | 35 | " white-space: pre-wrap;\n", |
36 | 36 | "}\n", |
37 | 37 | "</style>\n", |
38 | | - "<small>shape: (4, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>iban</th></tr><tr><td>str</td></tr></thead><tbody><tr><td>"AA110011123Z56…</td></tr><tr><td>"DE445001051754…</td></tr><tr><td>"AD120001203020…</td></tr><tr><td>"MR000002000101…</td></tr></tbody></table></div>" |
| 38 | + "<small>shape: (4, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>iban</th></tr><tr><td>str</td></tr></thead><tbody><tr><td>"AA110011123Z5678"</td></tr><tr><td>"DE44500105175407324931"</td></tr><tr><td>"AD1200012030200359100100"</td></tr><tr><td>"MR0000020001010000123456754"</td></tr></tbody></table></div>" |
39 | 39 | ], |
40 | 40 | "text/plain": [ |
41 | 41 | "shape: (4, 1)\n", |
|
81 | 81 | " white-space: pre-wrap;\n", |
82 | 82 | "}\n", |
83 | 83 | "</style>\n", |
84 | | - "<small>shape: (4, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>reason</th><th>is_valid</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>bool</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>"Invalid countr…</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>"DE"</td><td>"ok"</td><td>true</td><td>"50010517540732…</td><td>"50010517"</td><td>null</td></tr><tr><td>"AD"</td><td>"ok"</td><td>true</td><td>"00012030200359…</td><td>"0001"</td><td>"2030"</td></tr><tr><td>null</td><td>"Invalid checks…</td><td>false</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>" |
| 84 | + "<small>shape: (4, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>reason</th><th>is_valid</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>bool</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>"Invalid country code"</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>"DE"</td><td>"ok"</td><td>true</td><td>"500105175407324931"</td><td>"50010517"</td><td>null</td></tr><tr><td>"AD"</td><td>"ok"</td><td>true</td><td>"00012030200359100100"</td><td>"0001"</td><td>"2030"</td></tr><tr><td>null</td><td>"Invalid checksum"</td><td>false</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>" |
85 | 85 | ], |
86 | 86 | "text/plain": [ |
87 | 87 | "shape: (4, 6)\n", |
|
104 | 104 | ], |
105 | 105 | "source": [ |
106 | 106 | "df.select(\n", |
107 | | - " pl.col(\"iban\").iban.country_code().alias(\"country_code\"),\n", |
108 | | - " pl.col(\"iban\").iban.check().alias(\"reason\"),\n", |
109 | | - " pl.col(\"iban\").iban.is_valid().alias(\"is_valid\"),\n", |
110 | | - " pl.col(\"iban\").iban.bban().alias(\"bban\"),\n", |
111 | | - " pl.col(\"iban\").iban.bank_id().alias(\"bank_id\"),\n", |
112 | | - " pl.col(\"iban\").iban.branch_id().alias(\"branch_id\"),\n", |
| 107 | + " istr.iban_country_code(\"iban\").alias(\"country_code\"),\n", |
| 108 | + " istr.iban_check(\"iban\").alias(\"reason\"),\n", |
| 109 | + " istr.iban_is_valid(\"iban\").alias(\"is_valid\"),\n", |
| 110 | + " istr.iban_bban(\"iban\").alias(\"bban\"),\n", |
| 111 | + " istr.iban_bank_id(\"iban\").alias(\"bank_id\"),\n", |
| 112 | + " istr.iban_branch_id(\"iban\").alias(\"branch_id\"),\n", |
113 | 113 | ") " |
114 | 114 | ] |
115 | 115 | }, |
|
129 | 129 | " white-space: pre-wrap;\n", |
130 | 130 | "}\n", |
131 | 131 | "</style>\n", |
132 | | - "<small>shape: (4, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>check_digits</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>"DE"</td><td>"44"</td><td>"50010517540732…</td><td>"50010517"</td><td>null</td></tr><tr><td>"AD"</td><td>"12"</td><td>"00012030200359…</td><td>"0001"</td><td>"2030"</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>" |
| 132 | + "<small>shape: (4, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>check_digits</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>"DE"</td><td>"44"</td><td>"500105175407324931"</td><td>"50010517"</td><td>null</td></tr><tr><td>"AD"</td><td>"12"</td><td>"00012030200359100100"</td><td>"0001"</td><td>"2030"</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>" |
133 | 133 | ], |
134 | 134 | "text/plain": [ |
135 | 135 | "shape: (4, 5)\n", |
|
152 | 152 | ], |
153 | 153 | "source": [ |
154 | 154 | "df.select(\n", |
155 | | - " pl.col(\"iban\").iban.extract_all().alias(\"ib\")\n", |
| 155 | + " istr.iban_extract_all(\"iban\").alias(\"ib\")\n", |
156 | 156 | ").unnest(\"ib\")" |
157 | 157 | ] |
158 | 158 | }, |
|
259 | 259 | ], |
260 | 260 | "source": [ |
261 | 261 | "df.select(\n", |
262 | | - " pl.col(\"isin\").isin.country_code().alias(\"country_code\"),\n", |
263 | | - " pl.col(\"isin\").isin.check_digit().alias(\"check_digit\"),\n", |
264 | | - " pl.col(\"isin\").isin.security_id().alias(\"security_id\"),\n", |
265 | | - " pl.col(\"isin\").isin.is_valid().alias(\"is_valid\"),\n", |
| 262 | + " istr.isin_country_code(\"isin\").alias(\"country_code\"),\n", |
| 263 | + " istr.isin_check_digit(\"isin\").alias(\"check_digit\"),\n", |
| 264 | + " istr.isin_security_id(\"isin\").alias(\"security_id\"),\n", |
| 265 | + " istr.isin_is_valid(\"isin\").alias(\"is_valid\"),\n", |
266 | 266 | ")" |
267 | 267 | ] |
268 | 268 | }, |
|
314 | 314 | " white-space: pre-wrap;\n", |
315 | 315 | "}\n", |
316 | 316 | "</style>\n", |
317 | | - "<small>shape: (9, 8)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>host</th><th>domain</th><th>fragment</th><th>path</th><th>query</th><th>check</th><th>is_valid</th><th>is_special</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>bool</td><td>bool</td></tr></thead><tbody><tr><td>"example.com"</td><td>"example.com"</td><td>"row=4"</td><td>"/data.csv"</td><td>null</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"relative URL w…</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"relative URL w…</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"relative URL w…</td><td>false</td><td>null</td></tr><tr><td>"127.0.0.1"</td><td>null</td><td>null</td><td>"/"</td><td>null</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>"test.com"</td><td>"test.com"</td><td>null</td><td>"/"</td><td>null</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>"/tmp/foo"</td><td>null</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>"example.com"</td><td>"example.com"</td><td>null</td><td>"/products"</td><td>"page=2&sort=de…</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>" |
| 317 | + "<small>shape: (9, 8)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>host</th><th>domain</th><th>fragment</th><th>path</th><th>query</th><th>check</th><th>is_valid</th><th>is_special</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>bool</td><td>bool</td></tr></thead><tbody><tr><td>"example.com"</td><td>"example.com"</td><td>"row=4"</td><td>"/data.csv"</td><td>null</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"relative URL without a base"</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"relative URL without a base"</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"relative URL without a base"</td><td>false</td><td>null</td></tr><tr><td>"127.0.0.1"</td><td>null</td><td>null</td><td>"/"</td><td>null</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>"test.com"</td><td>"test.com"</td><td>null</td><td>"/"</td><td>null</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>"/tmp/foo"</td><td>null</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>"example.com"</td><td>"example.com"</td><td>null</td><td>"/products"</td><td>"page=2&sort=desc"</td><td>"ok"</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>" |
318 | 318 | ], |
319 | 319 | "text/plain": [ |
320 | 320 | "shape: (9, 8)\n", |
|
353 | 353 | ], |
354 | 354 | "source": [ |
355 | 355 | "df.select(\n", |
356 | | - " pl.col(\"url\").url.host().alias(\"host\"),\n", |
357 | | - " pl.col(\"url\").url.domain().alias(\"domain\"),\n", |
358 | | - " pl.col(\"url\").url.fragment().alias(\"fragment\"),\n", |
359 | | - " pl.col(\"url\").url.path().alias(\"path\"),\n", |
360 | | - " pl.col(\"url\").url.query().alias(\"query\"),\n", |
361 | | - " pl.col(\"url\").url.check().alias(\"check\"),\n", |
362 | | - " pl.col(\"url\").url.is_valid().alias(\"is_valid\"),\n", |
363 | | - " pl.col(\"url\").url.is_special().alias(\"is_special\"),\n", |
| 356 | + " istr.url_host(\"url\").alias(\"host\"),\n", |
| 357 | + " istr.url_domain(\"url\").alias(\"domain\"),\n", |
| 358 | + " istr.url_fragment(\"url\").alias(\"fragment\"),\n", |
| 359 | + " istr.url_path(\"url\").alias(\"path\"),\n", |
| 360 | + " istr.url_query(\"url\").alias(\"query\"),\n", |
| 361 | + " istr.url_check(\"url\").alias(\"check\"),\n", |
| 362 | + " istr.url_is_valid(\"url\").alias(\"is_valid\"),\n", |
| 363 | + " istr.url_is_special(\"url\").alias(\"is_special\"),\n", |
364 | 364 | ")" |
365 | 365 | ] |
366 | 366 | }, |
|
429 | 429 | ], |
430 | 430 | "source": [ |
431 | 431 | "df.select(\n", |
432 | | - " pl.col(\"cusip\").cusip.issue_num().alias(\"issue_num\"),\n", |
433 | | - " pl.col(\"cusip\").cusip.issuer_num().alias(\"issuer_num\"),\n", |
434 | | - " pl.col(\"cusip\").cusip.check_digit().alias(\"check_digit\"),\n", |
435 | | - " pl.col(\"cusip\").cusip.country_code().alias(\"country_code\"),\n", |
436 | | - " pl.col(\"cusip\").cusip.payload().alias(\"payload\"),\n", |
437 | | - " pl.col(\"cusip\").cusip.is_private_issue().alias(\"is_private_issue\"),\n", |
438 | | - " pl.col(\"cusip\").cusip.has_private_issuer().alias(\"has_private_issuer\"),\n", |
439 | | - " pl.col(\"cusip\").cusip.is_private_use().alias(\"is_private_use\"),\n", |
440 | | - " pl.col(\"cusip\").cusip.is_cins().alias(\"is_cins\"),\n", |
441 | | - " pl.col(\"cusip\").cusip.is_cins_base().alias(\"is_cins_base\"),\n", |
442 | | - " pl.col(\"cusip\").cusip.is_cins_extended().alias(\"is_cins_extended\"),\n", |
| 432 | + " istr.cusip_issue_num(\"cusip\").alias(\"issue_num\"),\n", |
| 433 | + " istr.cusip_issuer_num(\"cusip\").alias(\"issuer_num\"),\n", |
| 434 | + " istr.cusip_check_digit(\"cusip\").alias(\"check_digit\"),\n", |
| 435 | + " istr.cusip_country_code(\"cusip\").alias(\"country_code\"),\n", |
| 436 | + " istr.cusip_payload(\"cusip\").alias(\"payload\"),\n", |
| 437 | + " istr.cusip_is_private_issue(\"cusip\").alias(\"is_private_issue\"),\n", |
| 438 | + " istr.cusip_has_private_issuer(\"cusip\").alias(\"has_private_issuer\"),\n", |
| 439 | + " istr.cusip_is_private_use(\"cusip\").alias(\"is_private_use\"),\n", |
| 440 | + " istr.cusip_is_cins(\"cusip\").alias(\"is_cins\"),\n", |
| 441 | + " istr.cusip_is_cins_base(\"cusip\").alias(\"is_cins_base\"),\n", |
| 442 | + " istr.cusip_is_cins_extended(\"cusip\").alias(\"is_cins_extended\"),\n", |
443 | 443 | " )" |
444 | 444 | ] |
445 | 445 | }, |
|
476 | 476 | "name": "python", |
477 | 477 | "nbconvert_exporter": "python", |
478 | 478 | "pygments_lexer": "ipython3", |
479 | | - "version": "3.11.8" |
| 479 | + "version": "3.12.3" |
480 | 480 | } |
481 | 481 | }, |
482 | 482 | "nbformat": 4, |
|
0 commit comments