package org.apache.spark.examples.h2o;

import hex.Model;
import hex.SupervisedModel;
import hex.deeplearning.DeepLearning;
import hex.deeplearning.DeepLearningModel;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkFiles$;
import org.apache.spark.h2o.H2OContext;
import org.apache.spark.mllib.feature.HashingTF;
import org.apache.spark.mllib.feature.IDF;
import org.apache.spark.mllib.feature.IDFModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.SQLContext;
import scala.Array$;
import scala.MatchError;
import scala.Predef$;
import scala.StringContext;
import scala.Symbol$;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.ScalaRunTime$;
import water.Key;
import water.fvec.Frame;
import water.fvec.H2OFrame;

/* compiled from: HamOrSpamDemo.scala */
/* loaded from: input_file:org/apache/spark/examples/h2o/HamOrSpamDemo$.class */
public final class HamOrSpamDemo$ {
    public static final HamOrSpamDemo$ MODULE$ = null;
    private final String DATAFILE;
    private final Seq<String> TEST_MSGS;

    static {
        new HamOrSpamDemo$();
    }

    public String DATAFILE() {
        return this.DATAFILE;
    }

    public Seq<String> TEST_MSGS() {
        return this.TEST_MSGS;
    }

    public void main(String[] strArr) {
        SparkContext sparkContext = new SparkContext(DemoUtils$.MODULE$.configure("Sparkling Water Meetup: Ham or Spam (spam text messages detector)"));
        DemoUtils$.MODULE$.addFiles(sparkContext, Predef$.MODULE$.wrapRefArray(new String[]{new StringBuilder().append("examples/smalldata/").append(DATAFILE()).toString()}));
        H2OContext start = new H2OContext(sparkContext).start();
        SQLContext sQLContext = new SQLContext(sparkContext);
        RDD<String[]> load = load(sparkContext, DATAFILE());
        RDD map = load.map(new HamOrSpamDemo$$anonfun$1(), ClassTag$.MODULE$.apply(String.class));
        Tuple3<HashingTF, IDFModel, RDD<Vector>> buildIDFModel = buildIDFModel(tokenize(load.map(new HamOrSpamDemo$$anonfun$2(), ClassTag$.MODULE$.apply(String.class))), buildIDFModel$default$2(), buildIDFModel$default$3());
        if (buildIDFModel == null) {
            throw new MatchError(buildIDFModel);
        }
        Tuple3 tuple3 = new Tuple3((HashingTF) buildIDFModel._1(), (IDFModel) buildIDFModel._2(), (RDD) buildIDFModel._3());
        ObjectRef objectRef = new ObjectRef((HashingTF) tuple3._1());
        ObjectRef objectRef2 = new ObjectRef((IDFModel) tuple3._2());
        H2OFrame createDataFrame = start.createDataFrame(sQLContext.createSchemaRDD(map.zip((RDD) tuple3._3(), ClassTag$.MODULE$.apply(Vector.class)).map(new HamOrSpamDemo$$anonfun$3(), ClassTag$.MODULE$.apply(SMS.class)), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.examples.h2o.HamOrSpamDemo$$typecreator1$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.examples.h2o.SMS").asType().toTypeConstructor();
            }
        })));
        Frame[] split = DemoUtils$.MODULE$.split(createDataFrame, Predef$.MODULE$.wrapRefArray(new String[]{"train.hex", "valid.hex"}), Predef$.MODULE$.wrapDoubleArray((double[]) Array$.MODULE$.apply(Predef$.MODULE$.wrapDoubleArray(new double[]{0.8d}), ClassTag$.MODULE$.Double())));
        Tuple2 tuple2 = new Tuple2(split[0], split[1]);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        Tuple2 tuple22 = new Tuple2((Frame) tuple2._1(), (Frame) tuple2._2());
        Frame frame = (Frame) tuple22._1();
        Frame frame2 = (Frame) tuple22._2();
        createDataFrame.delete();
        DeepLearningModel buildDLModel = buildDLModel(frame, frame2, buildDLModel$default$3(), buildDLModel$default$4(), buildDLModel$default$5(), buildDLModel$default$6(), start);
        Predef$.MODULE$.println(new StringOps(Predef$.MODULE$.augmentString(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"\n         |AUC on train data = ", "\n         |AUC on valid data = ", "\n       "})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToDouble(DemoUtils$.MODULE$.binomialMM(buildDLModel, frame).auc()._auc), BoxesRunTime.boxToDouble(DemoUtils$.MODULE$.binomialMM(buildDLModel, frame2).auc()._auc)})))).stripMargin());
        TEST_MSGS().foreach(new HamOrSpamDemo$$anonfun$main$1(sparkContext, start, sQLContext, objectRef, objectRef2, buildDLModel));
        sparkContext.stop();
    }

    public RDD<String[]> load(SparkContext sparkContext, String str) {
        return sparkContext.textFile(SparkFiles$.MODULE$.get(str), sparkContext.textFile$default$2()).map(new HamOrSpamDemo$$anonfun$load$1(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class))).filter(new HamOrSpamDemo$$anonfun$load$2());
    }

    public RDD<Seq<String>> tokenize(RDD<String> rdd) {
        return rdd.map(new HamOrSpamDemo$$anonfun$4(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"the", "a", "", "in", "on", "at", "as", "not", "for"})), Seq$.MODULE$.apply(Predef$.MODULE$.wrapCharArray(new char[]{',', ':', ';', '/', '<', '>', '\"', '.', '(', ')', '?', '-', '\'', '!', '0', '1'}))), ClassTag$.MODULE$.apply(Seq.class));
    }

    public Tuple3<HashingTF, IDFModel, RDD<Vector>> buildIDFModel(RDD<Seq<String>> rdd, int i, int i2) {
        HashingTF hashingTF = new HashingTF(i2);
        RDD transform = hashingTF.transform(rdd);
        IDFModel fit = new IDF(i).fit(transform);
        return new Tuple3<>(hashingTF, fit, fit.transform(transform));
    }

    public int buildIDFModel$default$2() {
        return 4;
    }

    public int buildIDFModel$default$3() {
        return 1024;
    }

    public DeepLearningModel buildDLModel(Frame frame, Frame frame2, int i, double d, double d2, int[] iArr, H2OContext h2OContext) {
        DeepLearningModel.DeepLearningParameters deepLearningParameters = new DeepLearningModel.DeepLearningParameters();
        ((Model.Parameters) deepLearningParameters)._model_id = Key.make("dlModel.hex");
        ((Model.Parameters) deepLearningParameters)._train = h2OContext.dataFrameToKey(frame);
        ((Model.Parameters) deepLearningParameters)._valid = h2OContext.dataFrameToKey(frame2);
        ((SupervisedModel.SupervisedParameters) deepLearningParameters)._response_column = h2OContext.symbolToString(Symbol$.MODULE$.apply("target"));
        deepLearningParameters._epochs = i;
        deepLearningParameters._l1 = d;
        deepLearningParameters._hidden = iArr;
        DeepLearningModel deepLearningModel = new DeepLearning(deepLearningParameters).trainModel().get();
        deepLearningModel.score(frame).delete();
        deepLearningModel.score(frame2).delete();
        return deepLearningModel;
    }

    public int buildDLModel$default$3() {
        return 10;
    }

    public double buildDLModel$default$4() {
        return 0.001d;
    }

    public double buildDLModel$default$5() {
        return 0.0d;
    }

    public int[] buildDLModel$default$6() {
        return (int[]) Array$.MODULE$.apply(Predef$.MODULE$.wrapIntArray(new int[]{200, 200}), ClassTag$.MODULE$.Int());
    }

    public boolean isSpam(String str, SparkContext sparkContext, DeepLearningModel deepLearningModel, HashingTF hashingTF, IDFModel iDFModel, double d, SQLContext sQLContext, H2OContext h2OContext) {
        H2OFrame createDataFrame = h2OContext.createDataFrame(sQLContext.createSchemaRDD(iDFModel.transform(hashingTF.transform(tokenize(sparkContext.parallelize(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{str})), sparkContext.parallelize$default$2(), ClassTag$.MODULE$.apply(String.class))))).map(new HamOrSpamDemo$$anonfun$6(), ClassTag$.MODULE$.apply(SMS.class)), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.examples.h2o.HamOrSpamDemo$$typecreator2$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.examples.h2o.SMS").asType().toTypeConstructor();
            }
        })));
        createDataFrame.remove(0);
        return deepLearningModel.score(createDataFrame).vecs()[1].at(0L) < d;
    }

    public double isSpam$default$6() {
        return 0.5d;
    }

    private HamOrSpamDemo$() {
        MODULE$ = this;
        this.DATAFILE = "smsData.txt";
        this.TEST_MSGS = Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"Michal, beer tonight in MV?", "We tried to contact you re your reply to our offer of a Video Handset? 750 anytime any networks mins? UNLIMITED TEXT?"}));
    }
}
