I have created pyspark image: spark 3.3.0 hadoop 3.3.4
I want to deploy it into k8s so I can customize the number of executors, their memory/cpu but I do not want to deploy a spark job. I want to deploy these pods as a purely pyspark image so I can kubectl exec into it and start spark-shell.
How can I achieve it?
- I know how to use spark-operator
- I know how to deploy spark/pyspark jobs with use of jars/.py+.zip files
but 1 & 2 it is not the case here. Agin, I want purely access spark-shell.
So far, I am doing this but getting errors following this solution: https://gist.github.com/echang0929/9a9ccf7241f9221b7e59b9ec243e05f5#file-medium-spark-shell-on-k8s-sh
export NS_NAME=dev
export SA_NAME=spark
export CLN_NAME=spark-client
export POD_IMAG=raddeprodacr.azurecr.io/spark-py:s3.3.0
export SVC_NAME=$CLN_NAME-headless
export SVC_PORT=19987
export CLS_ENDP="k8s://http://127.0.0.1:8001"
export EXR_INST=3
export EXR_MORY=7g
export DRV_MORY=7g
kubectl config set-context --current --namespace=$NS_NAME
kubectl create sa $SA_NAME \
--dry-run=client -o yaml | kubectl apply -f -
kubectl create clusterrolebinding ${SA_NAME}-${NS_NAME}-edit \
--clusterrole=edit \
--serviceaccount=$NS_NAME:$SA_NAME \
--namespace=$NS_NAME \
--dry-run=client -o yaml | kubectl apply -f -
# CLN_NAME and POD_IMAG
kubectl run $CLN_NAME \
--image=$POD_IMAG \
--image-pull-policy=Always \
--serviceaccount=$SA_NAME \
--overrides='{"spec": {"nodeSelector": {"agentpool": "small"}}}' \
--dry-run=client -o yaml \
--command=true -- sh -c "exec tail -f /dev/null" | kubectl apply -f -
# SVC_NAME and SVC_PORT
kubectl expose pod $CLN_NAME \
--name=$SVC_NAME \
--type=ClusterIP \
--cluster-ip=None \
--port=$SVC_PORT \
--dry-run=client -o yaml | kubectl apply -f -
### Start spark-shell
kubectl exec -it $CLN_NAME -- sh -c '\
cd /opt/spark/; \
./bin/spark-shell \
--master k8s://"'$CLS_ENDP'" \
--deploy-mode client \
--conf spark.kubernetes.namespace="'$NS_NAME'" \
--conf spark.kubernetes.container.image="'$POD_IMAG'" \
--conf spark.kubernetes.container.image.pullPolicy=Always \
--conf spark.kubernetes.authenticate.serviceAccountName="'$SA_NAME'" \
--conf spark.kubernetes.driver.pod.name="'$CLN_NAME'" \
--conf spark.executor.instances="'$EXR_INST'" \
--conf spark.executor.memory="'$EXR_MORY'" \
--conf spark.driver.memory="'$DRV_MORY'" \
--conf spark.driver.host="'$SVC_NAME'" \
--conf spark.driver.port="'$SVC_PORT'" \
--conf spark.jars.ivy=/tmp/.ivy'