Getting a key error using ydata_profiling. Tried versions 4.10, 4.9, 4.8.3
KeyError: ‘50%’
try:
from ydata_profiling import ProfileReport
except ModuleNotFoundError:
!pip install ydata-profiling
from ydata_profiling import ProfileReport
design_report = ProfileReport(df)
design_report.to_notebook_iframe()
There are 99 variables in the dataset. It appears to always stop at variable 50. Is there a hard limit?
When I switch to sampling, I get a different error:
sample = pandas_df.sample(1000)
report = ProfileReport(sample)
report.to_notebook_iframe()
DispatchError: Function <code object pandas_get_series_descriptions at 0x68b9c90, file “/home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py”, line 60>
Traceback:
————————————————————————— TypeError Traceback (most recent call
last) File
~/cluster-env/trident_env/lib/python3.11/site-packages/multimethod/init.py:375,
in multimethod.call(self, *args, **kwargs)
374 try:
–> 375 return func(*args, **kwargs)
376 except TypeError as ex:File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/summary_algorithms.py:73,
in series_hashable..inner(config, series, summary)
72 return config, series, summary
—> 73 return fn(config, series, summary)File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/summary_algorithms.py:90,
in series_handle_nulls..inner(config, series, summary)
88 series = series.dropna()
—> 90 return fn(config, series, summary)File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/pandas/describe_date_pandas.py:44,
in pandas_describe_date_1d(config, series, summary)
41 else:
42 summary.update(
43 {
—> 44 “min”: pd.Timestamp.to_pydatetime(series.min()),
45 “max”: pd.Timestamp.to_pydatetime(series.max()),
46 }
47 )
49 summary[“range”] = summary[“max”] – summary[“min”]TypeError: descriptor ‘to_pydatetime’ for
‘pandas._libs.tslibs.timestamps._Timestamp’ objects doesn’t apply to a
‘datetime.date’ objectThe above exception was the direct cause of the following exception:
DispatchError Traceback (most recent call
last) File
~/cluster-env/trident_env/lib/python3.11/site-packages/multimethod/init.py:375,
in multimethod.call(self, *args, **kwargs)
374 try:
–> 375 return func(*args, **kwargs)
376 except TypeError as ex:File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py:57,
in pandas_describe_1d(config, series, summarizer, typeset)
56 typeset.type_schema[series.name] = vtype
—> 57 return summarizer.summarize(config, series, dtype=vtype)File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/summarizer.py:42,
in BaseSummarizer.summarize(self, config, series, dtype)
37 “””
38
39 Returns:
40 object:
41 “””
—> 42 _, _, summary = self.handle(str(dtype), config, series, {“type”: str(dtype)})
43 return summaryFile
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/handler.py:62, in Handler.handle(self, dtype, *args, **kwargs)
61 op = compose(funcs)
—> 62 return op(*args)File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/handler.py:21, in compose..func..func2(*x)
20 else:
—> 21 return f(*res)File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/handler.py:21, in compose..func..func2(*x)
20 else:
—> 21 return f(*res)File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/handler.py:21, in compose..func..func2(*x)
20 else:
—> 21 return f(*res)File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/handler.py:17, in compose..func..func2(*x)
16 def func2(*x) -> Any:
—> 17 res = g(*x)
18 if type(res) == bool:File
~/cluster-env/trident_env/lib/python3.11/site-packages/multimethod/init.py:377,
in multimethod.call(self, *args, **kwargs)
376 except TypeError as ex:
–> 377 raise DispatchError(f”Function {func.code}”) from exDispatchError: Function <code object inner at 0x7c3fb52871e0, file
“/home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/summary_algorithms.py”,
line 67>The above exception was the direct cause of the following exception:
DispatchError Traceback (most recent call
last) File
~/cluster-env/trident_env/lib/python3.11/site-packages/multimethod/init.py:375,
in multimethod.call(self, *args, **kwargs)
374 try:
–> 375 return func(*args, **kwargs)
376 except TypeError as ex:File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py:99,
in pandas_get_series_descriptions(config, df, summarizer, typeset,
pbar)
98 with multiprocessing.pool.ThreadPool(pool_size) as executor:
—> 99 for i, (column, description) in enumerate(
100 executor.imap_unordered(multiprocess_1d, args)
101 ):
102 pbar.set_postfix_str(f”Describe variable:{column}”)File
~/cluster-env/trident_env/lib/python3.11/multiprocessing/pool.py:873,
in IMapIterator.next(self, timeout)
872 return value
–> 873 raise valueFile
~/cluster-env/trident_env/lib/python3.11/multiprocessing/pool.py:125,
in worker(inqueue, outqueue, initializer, initargs, maxtasks,
wrap_exception)
124 try:
–> 125 result = (True, func(*args, **kwds))
126 except Exception as e:File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py:79,
in pandas_get_series_descriptions..multiprocess_1d(args)
78 column, series = args
—> 79 return column, describe_1d(config, series, summarizer, typeset)File
~/cluster-env/trident_env/lib/python3.11/site-packages/multimethod/init.py:377,
in multimethod.call(self, *args, **kwargs)
376 except TypeError as ex:
–> 377 raise DispatchError(f”Function {func.code}”) from exDispatchError: Function <code object pandas_describe_1d at 0x5a709b0,
file
“/home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py”,
line 19>The above exception was the direct cause of the following exception:
DispatchError Traceback (most recent call
last) Cell In[14], line 6
3 sample = pandas_df.sample(1000)
5 report = ProfileReport(sample)
—-> 6 report.to_notebook_iframe()File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/profile_report.py:526,
in ProfileReport.to_notebook_iframe(self)
524 with warnings.catch_warnings():
525 warnings.simplefilter(“ignore”)
–> 526 display(get_notebook_iframe(self.config, self))File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/report/presentation/flavours/widget/notebook.py:75,
in get_notebook_iframe(config, profile)
73 output = get_notebook_iframe_src(config, profile)
74 elif attribute == IframeAttribute.srcdoc:
—> 75 output = get_notebook_iframe_srcdoc(config, profile)
76 else:
77 raise ValueError(
78 f’Iframe Attribute can be “src” or “srcdoc” (current: {attribute}).’
79 )File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/report/presentation/flavours/widget/notebook.py:29,
in get_notebook_iframe_srcdoc(config, profile)
27 width = config.notebook.iframe.width
28 height = config.notebook.iframe.height
—> 29 src = html.escape(profile.to_html())
31 iframe = f”
33 return HTML(iframe)File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/profile_report.py:496,
in ProfileReport.to_html(self)
488 def to_html(self) -> str:
489 “””Generate and return complete template as lengthy string
490 for using with frameworks.
491 (…)
494
495 “””
–> 496 return self.htmlFile
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/profile_report.py:292,
in ProfileReport.html(self)
289 @property
290 def html(self) -> str:
291 if self._html is None:
–> 292 self._html = self._render_html()
293 return self._htmlFile
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/profile_report.py:409,
in ProfileReport._render_html(self)
406 def _render_html(self) -> str:
407 from ydata_profiling.report.presentation.flavours import HTMLReport
–> 409 report = self.report
411 with tqdm(
412 total=1, desc=”Render HTML”, disable=not self.config.progress_bar
413 ) as pbar:
414 html = HTMLReport(copy.deepcopy(report)).render(
415 nav=self.config.html.navbar_show,
416 offline=self.config.html.use_local_assets, (…)
424 version=self.description_set.package[“ydata_profiling_version”],
425 )File
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/profile_report.py:286,
in ProfileReport.report(self)
283 @property
284 def report(self) -> Root:
285 if self._report is None:
–> 286 self._report = get_report_structure(self.config, self.description_set)
287 return self._reportFile
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/profile_report.py:268,
in ProfileReport.description_set(self)
265 @property
266 def description_set(self) -> BaseDescription:
267 if self._description_set is None:
–> 268 self._description_set = describe_df(
269 self.config,
270 self.df,
271 self.summarizer,
272 self.typeset,
273 self._sample,
274 )
275 return self._description_setFile
~/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/describe.py:74,
in describe(config, df, summarizer, typeset, sample)
72 # Variable-specific
73 pbar.total += len(df.columns)
—> 74 series_description = get_series_descriptions(
75 config, df, summarizer, typeset, pbar
76 )
78 pbar.set_postfix_str(“Get variable types”)
79 pbar.total += 1File
~/cluster-env/trident_env/lib/python3.11/site-packages/multimethod/init.py:377,
in multimethod.call(self, *args, **kwargs)
375 return func(*args, **kwargs)
376 except TypeError as ex:
–> 377 raise DispatchError(f”Function {func.code}”) from exDispatchError: Function <code object pandas_get_series_descriptions at
0x5a81ae0, file
“/home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py”,
line 60>
5