from pyspark import SparkContext
from pyspark import SparkConf
import sys
def SetLogger( sc ):
logger = sc._jvm.org.apache.log4j
logger.LogManager.getLogger("org"). setLevel( logger.Level.ERROR )
logger.LogManager.getLogger("akka").setLevel( logger.Level.ERROR )
logger.LogManager.getRootLogger().setLevel(logger.Level.ERROR)
def CreateSparkContext():
sparkConf = SparkConf() \
.setAppName("WordCounts") \
.set("spark.ui.showConsoleProgress", "false") \
sc = SparkContext(conf = sparkConf)
print("master="+sc.master)
SetLogger(sc)
SetPath(sc)
return (sc)
1、本地提交代码命令:
spark-submit --driver-memory 2g --master local[3] wordcount.py
利用jupyter notebook 进行运行
cd ~/pythonwork/ipynotebook
1、本地运行命令:(小插曲:该命令直接复制过去不可以运行,手敲就可以)
PYSPARK_DRIVER_PYTHON=ipython PYSPARK_DRIVER_PYTHON_OPTS="notebook" pyspark
2、Hadoop yarn-client运行命令:(需提前启动Hadoop集群)
PYSPARK_DRIVER_PYTHON=ipython PYSPARK_DRIVER_PYTHON_OPTS="notebook" HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop MASTER=yarn-client pyspark
因篇幅问题不能全部显示,请点此查看更多更全内容
Copyright © 2019- kqyc.cn 版权所有 赣ICP备2024042808号-2
违法及侵权请联系:TEL:199 1889 7713 E-MAIL:2724546146@qq.com
本站由北京市万商天勤律师事务所王兴未律师提供法律服务