Here is a sample for binomial classification problem using H2O GLM algorithm using Credit Card data set in Scala language.
The following sample is for multinomial classification problem. This sample is created using Spark 2.1.0 with Sparkling Water 2.1.4.
import org.apache.spark.h2o._ import water.support.SparkContextSupport.addFiles import org.apache.spark.SparkFiles import java.io.File import water.support.{H2OFrameSupport, SparkContextSupport, ModelMetricsSupport} import water.Key import _root_.hex.glm.GLMModel import _root_.hex.ModelMetricsBinomial val hc = H2OContext.getOrCreate(sc) import hc._ import hc.implicits._ addFiles(sc, "/Users/avkashchauhan/learn/deepwater/credit_card_clients.csv") val creditCardData = new H2OFrame(new File(SparkFiles.get("credit_card_clients.csv"))) val ratios = Array[Double](0.8) val keys = Array[String]("train.hex", "valid.hex") val frs = H2OFrameSupport.split(creditCardData, keys, ratios) val (train, valid) = (frs(0), frs(1)) def buildGLMModel(train: Frame, valid: Frame, response: String) (implicit h2oContext: H2OContext): GLMModel = { import _root_.hex.glm.GLMModel.GLMParameters.Family import _root_.hex.glm.GLM import _root_.hex.glm.GLMModel.GLMParameters val glmParams = new GLMParameters(Family.binomial) glmParams._train = train glmParams._valid = valid glmParams._response_column = response glmParams._alpha = Array[Double](0.5) val glm = new GLM(glmParams, Key.make("glmModel.hex")) glm.trainModel().get() //val glmModel = glm.trainModel().get() } val glmModel = buildGLMModel(train, valid, 'default_payment_next_month)(hc) // Collect model metrics and evaluate model quality val trainMetrics = ModelMetricsSupport.modelMetrics[ModelMetricsBinomial](glmModel, train) val validMetrics = ModelMetricsSupport.modelMetrics[ModelMetricsBinomial](glmModel, valid) println(trainMetrics.rmse) println(validMetrics.rmse) println(trainMetrics.mse) println(validMetrics.mse) println(trainMetrics.r2) println(validMetrics.r2) println(trainMetrics.auc) println(validMetrics.auc) // Prediction addFiles(sc, "/Users/avkashchauhan/learn/deepwater/credit_card_predict.csv") val creditPredictData = new H2OFrame(new File(SparkFiles.get("credit_card_predict.csv"))) val predictionFrame = glmModel.score(creditPredictData) var predictonResults = asRDD[DoubleHolder](predictionFrame).collect.map(_.result.getOrElse(Double.NaN))
Thats it, enjoy!!
could you confirm where is the dataset (in order to download it)?
LikeLike