-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Nf floats #1011
base: develop
Are you sure you want to change the base?
Nf floats #1011
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
"""Configuration for the package.""" | ||
from enum import Enum | ||
from typing import Any, Dict, List, Optional | ||
|
||
import warnings | ||
from pydantic import BaseModel, BaseSettings, Field | ||
|
||
# Comment this function to see Warnings in console | ||
def warn(*args, **kwargs): | ||
pass | ||
warnings.warn = warn | ||
|
||
def _merge_dictionaries(dict1: dict, dict2: dict) -> dict: | ||
""" | ||
|
@@ -185,6 +189,22 @@ class Html(BaseModel): | |
|
||
full_width: bool = False | ||
|
||
class JsonNonFiniteEncoding(Enum): | ||
# Use the default python behaviour, which violates the official JSON standard, basically allow_nan = False | ||
__default = 0 | ||
# Encode non-finite numbers as null values, allow_nan = True | ||
__num_null = 1 | ||
# Encode non-finite floats as null values, allow_nan = True | ||
__float_null = 2 | ||
|
||
def fetch_python(self): | ||
return self.__default | ||
|
||
def fetch_null_values(self): | ||
return self.__num_null | ||
|
||
def fetch_float_values(self): | ||
return self.__float_null | ||
Comment on lines
+192
to
+207
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe Lukas' version is a bit more clear in this part. Can you help me understand why we would want private fields instead of Lukas' version? |
||
|
||
class Duplicates(BaseModel): | ||
head: int = 10 | ||
|
@@ -299,6 +319,11 @@ class Config: | |
n_freq_table_max: int = 10 | ||
n_extreme_obs: int = 10 | ||
|
||
#JSON for non finite values | ||
|
||
Jsnf_instance = JsonNonFiniteEncoding | ||
json_non_finite_encoding: Jsnf_instance = Jsnf_instance._JsonNonFiniteEncoding__num_null.value | ||
|
||
# Report rendering | ||
report: Report = Report() | ||
html: Html = Html() | ||
|
@@ -308,6 +333,43 @@ def update(self, updates: dict) -> "Settings": | |
update = _merge_dictionaries(self.dict(), updates) | ||
return self.parse_obj(self.copy(update=update)) | ||
|
||
class PandasSettings(Settings): | ||
pass | ||
|
||
class SparkSettings(Settings): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this would make more sense in the spark-branch |
||
# TO-DO write description | ||
vars: Univariate = Univariate() | ||
|
||
vars.num.low_categorical_threshold = 0 | ||
|
||
infer_dtypes = False | ||
|
||
correlations: Dict[str, Correlation] = { | ||
"spearman": Correlation(key="spearman"), | ||
"pearson": Correlation(key="pearson"), | ||
"kendall": Correlation(key="kendall"), | ||
"cramers": Correlation(key="cramers"), | ||
"phi_k": Correlation(key="phi_k"), | ||
} | ||
correlations["pearson"].calculate = True | ||
correlations["spearman"].calculate = True | ||
correlations["kendall"].calculate = False | ||
correlations["cramers"].calculate = False | ||
correlations["phi_k"].calculate = False | ||
|
||
interactions: Interactions = Interactions() | ||
interactions.continuous = False | ||
|
||
missing_diagrams: Dict[str, bool] = { | ||
"bar": False, | ||
"matrix": False, | ||
"dendrogram": False, | ||
"heatmap": False, | ||
} | ||
|
||
samples: Samples = Samples() | ||
samples.tail = 0 | ||
samples.random = 0 | ||
|
||
class Config: | ||
arg_groups: Dict[str, Any] = { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import pandas as pd | ||
from pandas_profiling import ProfileReport | ||
import numpy as np | ||
df = pd.DataFrame([1, 1, np.nan], columns=["a"]) | ||
|
||
profile = ProfileReport(df, title="Pandas Profiling Report", minimal=True) | ||
|
||
print(profile.to_json()) | ||
Comment on lines
+4
to
+8
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should validate the expected behavior for each encoding |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A feature to disable warnings is desirable, but it should not be enabled/disabled by commenting code, also the default should be having the warnings enabled.