Here is a sample for binomial classification problem using H2O GBM algorithm using Credit Card data set in Scala language.
The following sample is for multinomial classification problem. This sample is created using Spark 2.1.0 with Sparkling Water 2.1.4.
import org.apache.spark.h2o._ import water.support.SparkContextSupport.addFiles import org.apache.spark.SparkFiles import java.io.File import water.support.{H2OFrameSupport, SparkContextSupport, ModelMetricsSupport} import water.Key import _root_.hex.glm.GLMModel import _root_.hex.ModelMetricsBinomial val hc = H2OContext.getOrCreate(sc) import hc._ import hc.implicits._ addFiles(sc, "/Users/avkashchauhan/learn/deepwater/credit_card_clients.csv") val creditCardData = new H2OFrame(new File(SparkFiles.get("credit_card_clients.csv"))) val ratios = Array[Double](0.8) val keys = Array[String]("train.hex", "valid.hex") val frs = H2OFrameSupport.split(creditCardData, keys, ratios) val (train, valid) = (frs(0), frs(1)) def buildGLMModel(train: Frame, valid: Frame, response: String) (implicit h2oContext: H2OContext): GLMModel = { import _root_.hex.glm.GLMModel.GLMParameters.Family import _root_.hex.glm.GLM import _root_.hex.glm.GLMModel.GLMParameters val glmParams = new GLMParameters(Family.binomial) glmParams._train = train glmParams._valid = valid glmParams._response_column = response glmParams._alpha = Array[Double](0.5) val glm = new GLM(glmParams, Key.make("glmModel.hex")) glm.trainModel().get() //val glmModel = glm.trainModel().get() } val glmModel = buildGLMModel(train, valid, 'default_payment_next_month)(hc) // Collect model metrics and evaluate model quality val trainMetrics = ModelMetricsSupport.modelMetrics[ModelMetricsBinomial](glmModel, train) val validMetrics = ModelMetricsSupport.modelMetrics[ModelMetricsBinomial](glmModel, valid) println(trainMetrics.rmse) println(validMetrics.rmse) println(trainMetrics.mse) println(validMetrics.mse) println(trainMetrics.r2) println(validMetrics.r2) println(trainMetrics.auc) println(validMetrics.auc) // Preduction addFiles(sc, "/Users/avkashchauhan/learn/deepwater/credit_card_predict.csv") val creditPredictData = new H2OFrame(new File(SparkFiles.get("credit_card_predict.csv"))) val predictionFrame = glmModel.score(creditPredictData) var predictonResults = asRDD[DoubleHolder](predictionFrame).collect.map(_.result.getOrElse(Double.NaN))
Thats it, enjoy!!
2 thoughts on “Binomial classification example in Scala and GBM with H2O”