I am using a Colab Notebook and doing a linear regression in Pyspark. I completed the whole regression with no problems. However, we were asked to add a regression plot, and that's where I am stuck.
I had multiple features in my dataset which I clubbed into one column using the feature assembler. Here is what my predicted dataset looks like:
I now want to create something like this:
And I am using the following code:
import chart_studio.plotly as py
import plotly.graph_objects as go
x = mdata.toPandas()['Independent_Features'].values.tolist()
y = mdata.toPandas()['MonthlyCharges'].values.tolist()
y_pred=mdata.toPandas()['prediction'].values.tolist()
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=x,
y=y,
mode='markers',
name='Original_Data',
))
fig.add_trace(
go.Scatter(
x=x,
y=y_pred,
name='Predicted'
))
fig.update_layout(
title="Linear Regression",
xaxis_title="Independent Features",
yaxis_title="Monthly Charges",
font=dict(
family="Courier New, monospace",
size=18,
color="#7f7f7f"
)
)
fig.show()
But I am getting this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-45-cb270db7e5d4> in <module>()
30
31
---> 32 fig.show()
11 frames
/usr/local/lib/python3.7/dist-packages/plotly/basedatatypes.py in show(self, *args, **kwargs)
3396 import plotly.io as pio
3397
-> 3398 return pio.show(self, *args, **kwargs)
3399
3400 def to_json(self, *args, **kwargs):
/usr/local/lib/python3.7/dist-packages/plotly/io/_renderers.py in show(fig, renderer, validate, **kwargs)
387
388 # Mimetype renderers
--> 389 bundle = renderers._build_mime_bundle(fig_dict, renderers_string=renderer, **kwargs)
390 if bundle:
391 if not ipython_display:
/usr/local/lib/python3.7/dist-packages/plotly/io/_renderers.py in _build_mime_bundle(self, fig_dict, renderers_string, **kwargs)
295 setattr(renderer, k, v)
296
--> 297 bundle.update(renderer.to_mimebundle(fig_dict))
298
299 return bundle
/usr/local/lib/python3.7/dist-packages/plotly/io/_base_renderers.py in to_mimebundle(self, fig_dict)
389 default_width="100%",
390 default_height=525,
--> 391 validate=False,
392 )
393
/usr/local/lib/python3.7/dist-packages/plotly/io/_html.py in to_html(fig, config, auto_play, include_plotlyjs, include_mathjax, post_script, full_html, animation_opts, default_width, default_height, validate, div_id)
144
145 # ## Serialize figure ##
--> 146 jdata = to_json_plotly(fig_dict.get("data", []))
147 jlayout = to_json_plotly(fig_dict.get("layout", {}))
148
/usr/local/lib/python3.7/dist-packages/plotly/io/_json.py in to_json_plotly(plotly_object, pretty, engine)
122 from _plotly_utils.utils import PlotlyJSONEncoder
123
--> 124 return json.dumps(plotly_object, cls=PlotlyJSONEncoder, **opts)
125 elif engine == "orjson":
126 JsonConfig.validate_orjson()
/usr/lib/python3.7/json/__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, default=default, sort_keys=sort_keys,
--> 238 **kw).encode(obj)
239
240
/usr/local/lib/python3.7/dist-packages/_plotly_utils/utils.py in encode(self, o)
57 """
58 # this will raise errors in a normal-expected way
---> 59 encoded_o = super(PlotlyJSONEncoder, self).encode(o)
60 # Brute force guessing whether NaN or Infinity values are in the string
61 # We catch false positive cases (e.g. strings such as titles, labels etc.)
/usr/lib/python3.7/json/encoder.py in encode(self, o)
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
/usr/lib/python3.7/json/encoder.py in iterencode(self, o, _one_shot)
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
/usr/local/lib/python3.7/dist-packages/_plotly_utils/utils.py in default(self, obj)
134 except NotEncodable:
135 pass
--> 136 return _json.JSONEncoder.default(self, obj)
137
138 @staticmethod
/usr/lib/python3.7/json/encoder.py in default(self, o)
177
178 """
--> 179 raise TypeError(f'Object of type {o.__class__.__name__} '
180 f'is not JSON serializable')
181
TypeError: Object of type SparseVector is not JSON serializable
Here is a link to the notebook: https://colab.research.google.com/drive/1NLibPkZgOE_w7dVTerAF4nUQiPXW_o06?usp=sharing
I saw it mentioned SparseVector, so I tried to convert it to a DenseVector, but that just wouldn't work, tried multiple commands, but nothing helps.