summaryrefslogtreecommitdiff
path: root/MPE/spark/bin/pyspark
diff options
context:
space:
mode:
Diffstat (limited to 'MPE/spark/bin/pyspark')
-rw-r--r--MPE/spark/bin/pyspark77
1 files changed, 77 insertions, 0 deletions
diff --git a/MPE/spark/bin/pyspark b/MPE/spark/bin/pyspark
new file mode 100644
index 0000000..95ab628
--- /dev/null
+++ b/MPE/spark/bin/pyspark
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+ source "$(dirname "$0")"/find-spark-home
+fi
+
+source "${SPARK_HOME}"/bin/load-spark-env.sh
+export _SPARK_CMD_USAGE="Usage: ./bin/pyspark [options]"
+
+# In Spark 2.0, IPYTHON and IPYTHON_OPTS are removed and pyspark fails to launch if either option
+# is set in the user's environment. Instead, users should set PYSPARK_DRIVER_PYTHON=ipython
+# to use IPython and set PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
+# (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython
+# and executor Python executables.
+
+# Fail noisily if removed options are set
+if [[ -n "$IPYTHON" || -n "$IPYTHON_OPTS" ]]; then
+ echo "Error in pyspark startup:"
+ echo "IPYTHON and IPYTHON_OPTS are removed in Spark 2.0+. Remove these from the environment and set PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS instead."
+ exit 1
+fi
+
+# Default to standard python interpreter unless told otherwise
+if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+ PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"python"}"
+fi
+
+WORKS_WITH_IPYTHON=$(python -c 'import sys; print(sys.version_info >= (2, 7, 0))')
+
+# Determine the Python executable to use for the executors:
+if [[ -z "$PYSPARK_PYTHON" ]]; then
+ if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && ! $WORKS_WITH_IPYTHON ]]; then
+ echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
+ exit 1
+ else
+ PYSPARK_PYTHON=python
+ fi
+fi
+export PYSPARK_PYTHON
+
+# Add the PySpark classes to the Python path:
+export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH"
+
+# Load the PySpark shell.py script when ./pyspark is used interactively:
+export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
+export PYTHONSTARTUP="${SPARK_HOME}/python/pyspark/shell.py"
+
+# For pyspark tests
+if [[ -n "$SPARK_TESTING" ]]; then
+ unset YARN_CONF_DIR
+ unset HADOOP_CONF_DIR
+ export PYTHONHASHSEED=0
+ exec "$PYSPARK_DRIVER_PYTHON" -m "$1"
+ exit
+fi
+
+export PYSPARK_DRIVER_PYTHON
+export PYSPARK_DRIVER_PYTHON_OPTS
+exec "${SPARK_HOME}"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"