0

model.freqItemsets FPGROWTH algorithm is spark 2.4 is not showing any results for the complete dataset of 16gb but the same model or code is working for the 1Gb sample dataset which are subset or sample dataset from the 16GB data set

code snippet

import org.apache.spark._
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext._
import org.apache.log4j.Level

import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.log4j._
import scala.io.Source
import java.nio.charset.CodingErrorAction
import scala.io.Codec
import org.apache.spark.mllib.recommendation._

import org.apache.spark.ml.fpm.FPGrowth
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.mllib.fpm.FPGrowthModel
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.explode



object Full_Data_Association_4 {

    def main(args: Array[String]) {

        Logger.getLogger("org").setLevel(Level.ERROR)
        val ss = SparkSession
                      .builder
                      .appName("Fpgrowth_1").getOrCreate()

        import ss.implicits._

        val  in = ss.read.textFile(args(0))

        val in_2 = in.map(x => x.split("\t")(1))

        val in_3 =  in_2.map(t => t.split(",")).toDF("items")

        val fpgrowth = new FPGrowth().setItemsCol("items")
                        .setMinSupport(0.1).setMinConfidence(0.6)

        val model = fpgrowth.fit(in_3)

        model.freqItemsets.show(300)
    }

And I get the following output:

+-----+----+
|items|freq|
+-----+----+
+-----+----+
barbsan
  • 3,418
  • 11
  • 21
  • 28
BalaKumar
  • 19
  • 3

1 Answers1

0

This means, with minimum support 0.1 and minimum confidence 0.6, there is no result. Try giving a different value, say minimum support as .001, you might get some result(depending on the dataset)

user2805885
  • 1,683
  • 1
  • 14
  • 17
  • thank you for the solution , so i tried running with 0.01 as the minSupport and 0.1 as minConfidence , And the spark application is running for more than 18hours not showing up any errors but when i checked YARN resource manager UI , I discovered the below I have attached in the screen shot – BalaKumar Dec 06 '18 at 05:07