3.1 编写 WordCount 程序

步骤1:创建 maven 项目, 导入依赖

<dependencies>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.11</artifactId>
        <version>2.1.1</version>
    </dependency>
</dependencies>
<build>
    <plugins>
        <!-- 打包插件, 否则 scala 类不会编译并打包进去 -->
        <plugin>
            <groupId>net.alchim31.maven</groupId>
            <artifactId>scala-maven-plugin</artifactId>
            <version>3.4.6</version>
            <executions>
                <execution>
                    <goals>
                        <goal>compile</goal>
                        <goal>testCompile</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>

步骤2: 创建WordCount.scala

package day01

object WordCount {
    def main(args: Array[String]): Unit = {
        // 1. 创建 SparkConf对象, 并设置 App名字
        val conf: SparkConf = new SparkConf().setAppName("WordCount")
        // 2. 创建SparkContext对象
        val sc = new SparkContext(conf)
        // 3. 使用sc创建RDD并执行相应的transformation和action
        sc.textFile("/input")
            .flatMap(_.split(" "))
            .map((_, 1))
            .reduceByKey(_ + _)
            .saveAsTextFile("/result")
        // 4. 关闭连接
        sc.stop()
    }
}
Copyright © 尚硅谷大数据 2019 all right reserved,powered by Gitbook
该文件最后修订时间: 2019-08-09 00:21:43

results matching ""

    No results matching ""