Skip to content

Commit 1e1e87c

Browse files
authored
Merge pull request #13 from abstractqqq/better_api
updated syntax
2 parents 52b41c0 + 2260172 commit 1e1e87c

File tree

10 files changed

+502
-173
lines changed

10 files changed

+502
-173
lines changed

.github/workflows/CI.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
path: dist
7373

7474
macos:
75-
runs-on: macos-latest
75+
runs-on: macos-13
7676
strategy:
7777
matrix:
7878
target: [x86_64, aarch64]

examples/basics.ipynb

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"outputs": [],
99
"source": [
1010
"import polars as pl\n",
11-
"import polars_istr # noqa: F401"
11+
"import polars_istr as istr"
1212
]
1313
},
1414
{
@@ -35,7 +35,7 @@
3535
" white-space: pre-wrap;\n",
3636
"}\n",
3737
"</style>\n",
38-
"<small>shape: (4, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>iban</th></tr><tr><td>str</td></tr></thead><tbody><tr><td>&quot;AA110011123Z56…</td></tr><tr><td>&quot;DE445001051754…</td></tr><tr><td>&quot;AD120001203020…</td></tr><tr><td>&quot;MR000002000101…</td></tr></tbody></table></div>"
38+
"<small>shape: (4, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>iban</th></tr><tr><td>str</td></tr></thead><tbody><tr><td>&quot;AA110011123Z5678&quot;</td></tr><tr><td>&quot;DE44500105175407324931&quot;</td></tr><tr><td>&quot;AD1200012030200359100100&quot;</td></tr><tr><td>&quot;MR0000020001010000123456754&quot;</td></tr></tbody></table></div>"
3939
],
4040
"text/plain": [
4141
"shape: (4, 1)\n",
@@ -81,7 +81,7 @@
8181
" white-space: pre-wrap;\n",
8282
"}\n",
8383
"</style>\n",
84-
"<small>shape: (4, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>reason</th><th>is_valid</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>bool</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>&quot;Invalid countr…</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;DE&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>&quot;50010517540732…</td><td>&quot;50010517&quot;</td><td>null</td></tr><tr><td>&quot;AD&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>&quot;00012030200359…</td><td>&quot;0001&quot;</td><td>&quot;2030&quot;</td></tr><tr><td>null</td><td>&quot;Invalid checks…</td><td>false</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
84+
"<small>shape: (4, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>reason</th><th>is_valid</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>bool</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>&quot;Invalid country code&quot;</td><td>false</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;DE&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>&quot;500105175407324931&quot;</td><td>&quot;50010517&quot;</td><td>null</td></tr><tr><td>&quot;AD&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>&quot;00012030200359100100&quot;</td><td>&quot;0001&quot;</td><td>&quot;2030&quot;</td></tr><tr><td>null</td><td>&quot;Invalid checksum&quot;</td><td>false</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
8585
],
8686
"text/plain": [
8787
"shape: (4, 6)\n",
@@ -104,12 +104,12 @@
104104
],
105105
"source": [
106106
"df.select(\n",
107-
" pl.col(\"iban\").iban.country_code().alias(\"country_code\"),\n",
108-
" pl.col(\"iban\").iban.check().alias(\"reason\"),\n",
109-
" pl.col(\"iban\").iban.is_valid().alias(\"is_valid\"),\n",
110-
" pl.col(\"iban\").iban.bban().alias(\"bban\"),\n",
111-
" pl.col(\"iban\").iban.bank_id().alias(\"bank_id\"),\n",
112-
" pl.col(\"iban\").iban.branch_id().alias(\"branch_id\"),\n",
107+
" istr.iban_country_code(\"iban\").alias(\"country_code\"),\n",
108+
" istr.iban_check(\"iban\").alias(\"reason\"),\n",
109+
" istr.iban_is_valid(\"iban\").alias(\"is_valid\"),\n",
110+
" istr.iban_bban(\"iban\").alias(\"bban\"),\n",
111+
" istr.iban_bank_id(\"iban\").alias(\"bank_id\"),\n",
112+
" istr.iban_branch_id(\"iban\").alias(\"branch_id\"),\n",
113113
") "
114114
]
115115
},
@@ -129,7 +129,7 @@
129129
" white-space: pre-wrap;\n",
130130
"}\n",
131131
"</style>\n",
132-
"<small>shape: (4, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>check_digits</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;DE&quot;</td><td>&quot;44&quot;</td><td>&quot;50010517540732…</td><td>&quot;50010517&quot;</td><td>null</td></tr><tr><td>&quot;AD&quot;</td><td>&quot;12&quot;</td><td>&quot;00012030200359…</td><td>&quot;0001&quot;</td><td>&quot;2030&quot;</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
132+
"<small>shape: (4, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>country_code</th><th>check_digits</th><th>bban</th><th>bank_id</th><th>branch_id</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&quot;DE&quot;</td><td>&quot;44&quot;</td><td>&quot;500105175407324931&quot;</td><td>&quot;50010517&quot;</td><td>null</td></tr><tr><td>&quot;AD&quot;</td><td>&quot;12&quot;</td><td>&quot;00012030200359100100&quot;</td><td>&quot;0001&quot;</td><td>&quot;2030&quot;</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
133133
],
134134
"text/plain": [
135135
"shape: (4, 5)\n",
@@ -152,7 +152,7 @@
152152
],
153153
"source": [
154154
"df.select(\n",
155-
" pl.col(\"iban\").iban.extract_all().alias(\"ib\")\n",
155+
" istr.iban_extract_all(\"iban\").alias(\"ib\")\n",
156156
").unnest(\"ib\")"
157157
]
158158
},
@@ -259,10 +259,10 @@
259259
],
260260
"source": [
261261
"df.select(\n",
262-
" pl.col(\"isin\").isin.country_code().alias(\"country_code\"),\n",
263-
" pl.col(\"isin\").isin.check_digit().alias(\"check_digit\"),\n",
264-
" pl.col(\"isin\").isin.security_id().alias(\"security_id\"),\n",
265-
" pl.col(\"isin\").isin.is_valid().alias(\"is_valid\"),\n",
262+
" istr.isin_country_code(\"isin\").alias(\"country_code\"),\n",
263+
" istr.isin_check_digit(\"isin\").alias(\"check_digit\"),\n",
264+
" istr.isin_security_id(\"isin\").alias(\"security_id\"),\n",
265+
" istr.isin_is_valid(\"isin\").alias(\"is_valid\"),\n",
266266
")"
267267
]
268268
},
@@ -314,7 +314,7 @@
314314
" white-space: pre-wrap;\n",
315315
"}\n",
316316
"</style>\n",
317-
"<small>shape: (9, 8)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>host</th><th>domain</th><th>fragment</th><th>path</th><th>query</th><th>check</th><th>is_valid</th><th>is_special</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>bool</td><td>bool</td></tr></thead><tbody><tr><td>&quot;example.com&quot;</td><td>&quot;example.com&quot;</td><td>&quot;row=4&quot;</td><td>&quot;/data.csv&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL w…</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL w…</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL w…</td><td>false</td><td>null</td></tr><tr><td>&quot;127.0.0.1&quot;</td><td>null</td><td>null</td><td>&quot;/&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>&quot;test.com&quot;</td><td>&quot;test.com&quot;</td><td>null</td><td>&quot;/&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>&quot;/tmp/foo&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>&quot;example.com&quot;</td><td>&quot;example.com&quot;</td><td>null</td><td>&quot;/products&quot;</td><td>&quot;page=2&amp;sort=de…</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
317+
"<small>shape: (9, 8)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>host</th><th>domain</th><th>fragment</th><th>path</th><th>query</th><th>check</th><th>is_valid</th><th>is_special</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>bool</td><td>bool</td></tr></thead><tbody><tr><td>&quot;example.com&quot;</td><td>&quot;example.com&quot;</td><td>&quot;row=4&quot;</td><td>&quot;/data.csv&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL without a base&quot;</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL without a base&quot;</td><td>false</td><td>null</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;relative URL without a base&quot;</td><td>false</td><td>null</td></tr><tr><td>&quot;127.0.0.1&quot;</td><td>null</td><td>null</td><td>&quot;/&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>&quot;test.com&quot;</td><td>&quot;test.com&quot;</td><td>null</td><td>&quot;/&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>&quot;/tmp/foo&quot;</td><td>null</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>&quot;example.com&quot;</td><td>&quot;example.com&quot;</td><td>null</td><td>&quot;/products&quot;</td><td>&quot;page=2&amp;sort=desc&quot;</td><td>&quot;ok&quot;</td><td>true</td><td>true</td></tr><tr><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
318318
],
319319
"text/plain": [
320320
"shape: (9, 8)\n",
@@ -353,14 +353,14 @@
353353
],
354354
"source": [
355355
"df.select(\n",
356-
" pl.col(\"url\").url.host().alias(\"host\"),\n",
357-
" pl.col(\"url\").url.domain().alias(\"domain\"),\n",
358-
" pl.col(\"url\").url.fragment().alias(\"fragment\"),\n",
359-
" pl.col(\"url\").url.path().alias(\"path\"),\n",
360-
" pl.col(\"url\").url.query().alias(\"query\"),\n",
361-
" pl.col(\"url\").url.check().alias(\"check\"),\n",
362-
" pl.col(\"url\").url.is_valid().alias(\"is_valid\"),\n",
363-
" pl.col(\"url\").url.is_special().alias(\"is_special\"),\n",
356+
" istr.url_host(\"url\").alias(\"host\"),\n",
357+
" istr.url_domain(\"url\").alias(\"domain\"),\n",
358+
" istr.url_fragment(\"url\").alias(\"fragment\"),\n",
359+
" istr.url_path(\"url\").alias(\"path\"),\n",
360+
" istr.url_query(\"url\").alias(\"query\"),\n",
361+
" istr.url_check(\"url\").alias(\"check\"),\n",
362+
" istr.url_is_valid(\"url\").alias(\"is_valid\"),\n",
363+
" istr.url_is_special(\"url\").alias(\"is_special\"),\n",
364364
")"
365365
]
366366
},
@@ -429,17 +429,17 @@
429429
],
430430
"source": [
431431
"df.select(\n",
432-
" pl.col(\"cusip\").cusip.issue_num().alias(\"issue_num\"),\n",
433-
" pl.col(\"cusip\").cusip.issuer_num().alias(\"issuer_num\"),\n",
434-
" pl.col(\"cusip\").cusip.check_digit().alias(\"check_digit\"),\n",
435-
" pl.col(\"cusip\").cusip.country_code().alias(\"country_code\"),\n",
436-
" pl.col(\"cusip\").cusip.payload().alias(\"payload\"),\n",
437-
" pl.col(\"cusip\").cusip.is_private_issue().alias(\"is_private_issue\"),\n",
438-
" pl.col(\"cusip\").cusip.has_private_issuer().alias(\"has_private_issuer\"),\n",
439-
" pl.col(\"cusip\").cusip.is_private_use().alias(\"is_private_use\"),\n",
440-
" pl.col(\"cusip\").cusip.is_cins().alias(\"is_cins\"),\n",
441-
" pl.col(\"cusip\").cusip.is_cins_base().alias(\"is_cins_base\"),\n",
442-
" pl.col(\"cusip\").cusip.is_cins_extended().alias(\"is_cins_extended\"),\n",
432+
" istr.cusip_issue_num(\"cusip\").alias(\"issue_num\"),\n",
433+
" istr.cusip_issuer_num(\"cusip\").alias(\"issuer_num\"),\n",
434+
" istr.cusip_check_digit(\"cusip\").alias(\"check_digit\"),\n",
435+
" istr.cusip_country_code(\"cusip\").alias(\"country_code\"),\n",
436+
" istr.cusip_payload(\"cusip\").alias(\"payload\"),\n",
437+
" istr.cusip_is_private_issue(\"cusip\").alias(\"is_private_issue\"),\n",
438+
" istr.cusip_has_private_issuer(\"cusip\").alias(\"has_private_issuer\"),\n",
439+
" istr.cusip_is_private_use(\"cusip\").alias(\"is_private_use\"),\n",
440+
" istr.cusip_is_cins(\"cusip\").alias(\"is_cins\"),\n",
441+
" istr.cusip_is_cins_base(\"cusip\").alias(\"is_cins_base\"),\n",
442+
" istr.cusip_is_cins_extended(\"cusip\").alias(\"is_cins_extended\"),\n",
443443
" )"
444444
]
445445
},
@@ -476,7 +476,7 @@
476476
"name": "python",
477477
"nbconvert_exporter": "python",
478478
"pygments_lexer": "ipython3",
479-
"version": "3.11.8"
479+
"version": "3.12.3"
480480
}
481481
},
482482
"nbformat": 4,

python/polars_istr/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
from .iban import IbanExt # noqa: E402
2-
from .isin import IsinExt # noqa: E402
3-
from .cusip import CusipExt # noqa: E402
4-
from .url import UrlExt # noqa: E402
1+
from .iban import * # noqa: E402, F403
2+
from .isin import * # noqa: E402, F403
3+
from .cusip import * # noqa: E402, F403
4+
from .url import * # noqa: E402, F403
55

66
__version__ = "0.1.0"
7-
__all__ = ["IbanExt", "IsinExt", "CusipExt", "UrlExt"]

python/polars_istr/_utils.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import polars as pl
2+
from typing import Any, Optional, List, Dict
3+
from .type_alias import StrOrExpr
4+
5+
6+
def str_to_expr(x: StrOrExpr) -> pl.Expr:
7+
if isinstance(x, str):
8+
return pl.col(x)
9+
elif isinstance(x, pl.Expr):
10+
return x
11+
else:
12+
raise ValueError("Can only parse str (column name) or Polars expressions.")
13+
14+
15+
def pl_plugin(
16+
*,
17+
lib: str,
18+
symbol: str,
19+
args: List[StrOrExpr],
20+
kwargs: Optional[Dict[str, Any]] = None,
21+
is_elementwise: bool = False,
22+
returns_scalar: bool = False,
23+
changes_length: bool = False,
24+
cast_to_supertype: bool = False,
25+
) -> pl.Expr:
26+
# pl.__version__ should always be a valid version number, so split returns always 3 strs
27+
if tuple(int(x) for x in pl.__version__.split(".")) < (0, 20, 16):
28+
# This will eventually be deprecated?
29+
first = str_to_expr(args[0])
30+
return first.register_plugin(
31+
lib=lib,
32+
symbol=symbol,
33+
args=[str_to_expr(x) for x in args[1:]],
34+
kwargs=kwargs,
35+
is_elementwise=is_elementwise,
36+
returns_scalar=returns_scalar,
37+
changes_length=changes_length,
38+
cast_to_supertype=cast_to_supertype,
39+
)
40+
41+
from polars.plugins import register_plugin_function
42+
43+
return register_plugin_function(
44+
plugin_path=lib,
45+
args=[str_to_expr(x) for x in args],
46+
function_name=symbol,
47+
kwargs=kwargs,
48+
is_elementwise=is_elementwise,
49+
returns_scalar=returns_scalar,
50+
changes_length=changes_length,
51+
cast_to_supertype=cast_to_supertype,
52+
)

0 commit comments

Comments
 (0)