Skip to content

Commit 73f94f2

Browse files
authored
Support opening spark data frames in the data viewer (#10304)
* Support opening spark data frames in the data viewer * Review feedback
1 parent 1f39fe2 commit 73f94f2

File tree

3 files changed

+23
-12
lines changed

3 files changed

+23
-12
lines changed

news/2 Fixes/9959.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Support opening spark dataframes in the data viewer.

pythonFiles/datascience/getJupyterVariableDataFrameInfo.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@
1010
# Indexes off of _VSCODE_targetVariable need to index types that are part of IJupyterVariable
1111
_VSCODE_targetVariable = _VSCODE_json.loads("""_VSCode_JupyterTestValue""")
1212

13+
# Function to compute row count for a value
14+
def _VSCODE_getRowCount(var):
15+
if hasattr(var, "shape"):
16+
try:
17+
# Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it
18+
if isinstance(var.shape, tuple):
19+
return var.shape[0]
20+
except TypeError:
21+
return 0
22+
elif hasattr(var, "__len__"):
23+
try:
24+
return len(var)
25+
except TypeError:
26+
return 0
27+
28+
1329
# First check to see if we are a supported type, this prevents us from adding types that are not supported
1430
# and also keeps our types in sync with what the variable explorer says that we support
1531
if _VSCODE_targetVariable["type"] not in _VSCode_supportsDataExplorer:
@@ -21,18 +37,7 @@
2137
_VSCODE_evalResult = eval(_VSCODE_targetVariable["name"])
2238

2339
# Figure out shape if not already there. Use the shape to compute the row count
24-
if hasattr(_VSCODE_evalResult, "shape"):
25-
try:
26-
# Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it
27-
if isinstance(_VSCODE_evalResult.shape, tuple):
28-
_VSCODE_targetVariable["rowCount"] = _VSCODE_evalResult.shape[0]
29-
except TypeError:
30-
_VSCODE_targetVariable["rowCount"] = 0
31-
elif hasattr(_VSCODE_evalResult, "__len__"):
32-
try:
33-
_VSCODE_targetVariable["rowCount"] = len(_VSCODE_evalResult)
34-
except TypeError:
35-
_VSCODE_targetVariable["rowCount"] = 0
40+
_VSCODE_targetVariable["rowCount"] = _VSCODE_getRowCount(_VSCODE_evalResult)
3641

3742
# Turn the eval result into a df
3843
_VSCODE_df = _VSCODE_evalResult
@@ -45,6 +50,9 @@
4550
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
4651
elif _VSCODE_targetVariable["type"] == "ndarray":
4752
_VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)
53+
elif hasattr(_VSCODE_df, "toPandas"):
54+
_VSCODE_df = _VSCODE_df.toPandas()
55+
_VSCODE_targetVariable["rowCount"] = _VSCODE_getRowCount(_VSCODE_df)
4856

4957
# If any rows, use pandas json to convert a single row to json. Extract
5058
# the column names and types from the json so we match what we'll fetch when

pythonFiles/datascience/getJupyterVariableDataFrameRows.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
2525
elif _VSCODE_targetVariable["type"] == "ndarray":
2626
_VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)
27+
elif hasattr(_VSCODE_df, "toPandas"):
28+
_VSCODE_df = _VSCODE_df.toPandas()
2729
# If not a known type, then just let pandas handle it.
2830
elif not (hasattr(_VSCODE_df, "iloc")):
2931
_VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)

0 commit comments

Comments
 (0)