I am trying to fix this error for the last hours.
I want to create a GraphFrame in jupyter. In conda I start the jupyter notebook as :pyspark --packages graphframes:graphframes:0.8.2-spark2.4-s_2.11
spark.version '3.4.1' Scala code runner version 3.3.0 openjdk version "11.0.16.1" 2022-08-12 LTS
MyCode is
import os
import sys
import glob
os.environ['SPARK_HOME'] = 'C:\spark-3.4.1-bin-hadoop3'
os.environ['JAVA_HOME'] = 'C:\Program Files (x86)\Java\jre-1.8'
os.environ['HADOOP_HOME'] = 'C:\spark-3.4.1-bin-hadoop3'
spark_python = os.path.join(os.environ.get('SPARK_HOME',None),'python')
py4j = glob.glob(os.path.join(spark_python,'lib','py4j-*.zip'))[0]
graphf = glob.glob(os.path.join(spark_python,'graphframes.zip'))[0]
sys.path[:0]=[spark_python,py4j]
sys.path[:0]=[spark_python,graphf]
os.environ['PYTHONPATH']=py4j+os.pathsep+graphf
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Spark Examples").enableHiveSupport().getOrCreate()
from graphframes import
vertices = spark.createDataFrame([
("Alice", 45),
("Jacob", 43),
("Roy", 21),
("Ryan", 49),
("Emily", 24),
("Sheldon", 52)],
["id", "age"]
)
edges = spark.createDataFrame([("Sheldon", "Alice", "Sister"),
("Alice", "Jacob", "Husband"),
("Emily", "Jacob", "Father"),
("Ryan", "Alice", "Friend"),
("Alice", "Emily", "Daughter"),
("Alice", "Roy", "Son"),
("Jacob", "Roy", "Son")],
["src", "dst", "relation"])
family_tree = GraphFrame(vertices, edges)
the error
Py4JJavaError Traceback (most recent call last)
Cell In[17], line 1
----> 1 family_tree = GraphFrame(vertices, edges)
File ~\AppData\Local\Temp\spark-f62d597b-45f4-4439-bd18-a4472ada6af2\userFiles-dad65b78-f80a-4e50-94ba-6e6e21b3eb8a\graphframes_graphframes-0.8.2-spark2.4-s_2.11.jar\graphframes\graphframe.py:89, in GraphFrame.__init__(self, v, e)
84 if self.DST not in e.columns:
85 raise ValueError(
86 "Destination vertex ID column {} missing from edge DataFrame, which has columns: {}"
87 .format(self.DST, ",".join(e.columns)))
---> 89 self._jvm_graph = self._jvm_gf_api.createGraph(v._jdf, e._jdf)
File C:\spark-3.4.1-bin-hadoop3\python\lib\py4j-0.10.9.7-src.zip\py4j\java_gateway.py:1322, in JavaMember.__call__(self, *args)
1316 command = proto.CALL_COMMAND_NAME +\
1317 self.command_header +\
1318 args_command +\
1319 proto.END_COMMAND_PART
1321 answer = self.gateway_client.send_command(command)
-> 1322 return_value = get_return_value(
1323 answer, self.gateway_client, self.target_id, self.name)
1325 for temp_arg in temp_args:
1326 if hasattr(temp_arg, "_detach"):
File C:\spark-3.4.1-bin-hadoop3\python\pyspark\errors\exceptions\captured.py:169, in capture_sql_exception.<locals>.deco(*a, **kw)
167 def deco(*a: Any, **kw: Any) -> Any:
168 try:
--> 169 return f(*a, **kw)
170 except Py4JJavaError as e:
171 converted = convert_exception(e.java_exception)
File C:\spark-3.4.1-bin-hadoop3\python\lib\py4j-0.10.9.7-src.zip\py4j\protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
Py4JJavaError: An error occurred while calling o124.createGraph.
: java.lang.NoSuchMethodError: 'scala.collection.mutable.ArrayOps scala.Predef$.refArrayOps(java.lang.Object[])'
at org.graphframes.GraphFrame$.apply(GraphFrame.scala:676)
at org.graphframes.GraphFramePythonAPI.createGraph(GraphFramePythonAPI.scala:10)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52)
at java.base/java.lang.reflect.Method.invoke(Method.java:578)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:1623)
family_tree = GraphFrame(vertices, edges)
and It was supposed to give a GraphFrame