yhuang-db
diff --git a/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 15 additions & 0 deletions b/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala‎
Lines changed: 2 additions & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala‎
Lines changed: 12 additions & 9 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala‎
Lines changed: 2 additions & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala‎
Lines changed: 12 additions & 9 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala‎
Lines changed: 23 additions & 20 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala‎
Lines changed: 23 additions & 20 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala‎
Lines changed: 11 additions & 8 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala‎
Lines changed: 21 additions & 16 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala‎
Lines changed: 21 additions & 16 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala‎
Lines changed: 13 additions & 10 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala‎
Lines changed: 13 additions & 10 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala‎
Lines changed: 8 additions & 5 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala‎
Lines changed: 8 additions & 5 deletions
@@ -811,6 +811,21 @@
           "Cannot retrieve <objectName> from the ML cache. It is probably because the entry has been evicted."
         ]
       },
+      "ML_CACHE_SIZE_OVERFLOW_EXCEPTION" : {
+        "message" : [
+          "The model cache size in current session is about to exceed",
+          "<mlCacheMaxSize> bytes.",
+          "Please delete existing cached model by executing 'del model' in python client before fitting new model or loading new model,",
+          "or increase Spark config 'spark.connect.session.connectML.mlCache.memoryControl.maxSize'."
+        ]
+      },
+      "MODEL_SIZE_OVERFLOW_EXCEPTION" : {
+        "message" : [
+          "The fitted or loaded model size is about <modelSize> bytes.",
+          "Please fit or load a model smaller than <modelMaxSize> bytes,",
+          "or increase Spark config 'spark.connect.session.connectML.mlCache.memoryControl.maxModelSize'."
+        ]
+      },
       "UNSUPPORTED_EXCEPTION" : {
         "message" : [
           "<message>"
 
@@ -195,6 +195,8 @@ class DecisionTreeClassificationModel private[ml] (
   // For ml connect only
   private[ml] def this() = this("", Node.dummyNode, -1, -1)
 
+  override def estimatedSize: Long = getEstimatedSize()
+
   override def predict(features: Vector): Double = {
     rootNode.predictImpl(features).prediction
   }
 
@@ -224,15 +224,18 @@ class FMClassifier @Since("3.0.0") (
     val model = copyValues(new FMClassificationModel(uid, intercept, linear, factors))
     val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)
 
-    val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
-    val summary = new FMClassificationTrainingSummaryImpl(
-      summaryModel.transform(dataset),
-      probabilityColName,
-      predictionColName,
-      $(labelCol),
-      weightColName,
-      objectiveHistory)
-    model.setSummary(Some(summary))
+    if (SummaryUtils.enableTrainingSummary) {
+      val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
+      val summary = new FMClassificationTrainingSummaryImpl(
+        summaryModel.transform(dataset),
+        probabilityColName,
+        predictionColName,
+        $(labelCol),
+        weightColName,
+        objectiveHistory)
+      model.setSummary(Some(summary))
+    }
+    model
   }
 
   @Since("3.0.0")
 
@@ -276,6 +276,8 @@ class GBTClassificationModel private[ml](
   private[ml] def this() = this("",
     Array(new DecisionTreeRegressionModel), Array(Double.NaN), -1, -1)
 
+  override def estimatedSize: Long = getEstimatedSize()
+
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees
 
 
@@ -277,15 +277,18 @@ class LinearSVC @Since("2.2.0") (
     val model = copyValues(new LinearSVCModel(uid, coefficients, intercept))
     val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)
 
-    val (summaryModel, rawPredictionColName, predictionColName) = model.findSummaryModel()
-    val summary = new LinearSVCTrainingSummaryImpl(
-      summaryModel.transform(dataset),
-      rawPredictionColName,
-      predictionColName,
-      $(labelCol),
-      weightColName,
-      objectiveHistory)
-    model.setSummary(Some(summary))
+    if (SummaryUtils.enableTrainingSummary) {
+      val (summaryModel, rawPredictionColName, predictionColName) = model.findSummaryModel()
+      val summary = new LinearSVCTrainingSummaryImpl(
+        summaryModel.transform(dataset),
+        rawPredictionColName,
+        predictionColName,
+        $(labelCol),
+        weightColName,
+        objectiveHistory)
+      model.setSummary(Some(summary))
+    }
+    model
   }
 
   private def trainImpl(
 
@@ -711,27 +711,30 @@ class LogisticRegression @Since("1.2.0") (
       numClasses, checkMultinomial(numClasses)))
     val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)
 
-    val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
-    val logRegSummary = if (numClasses <= 2) {
-      new BinaryLogisticRegressionTrainingSummaryImpl(
-        summaryModel.transform(dataset),
-        probabilityColName,
-        predictionColName,
-        $(labelCol),
-        $(featuresCol),
-        weightColName,
-        objectiveHistory)
-    } else {
-      new LogisticRegressionTrainingSummaryImpl(
-        summaryModel.transform(dataset),
-        probabilityColName,
-        predictionColName,
-        $(labelCol),
-        $(featuresCol),
-        weightColName,
-        objectiveHistory)
+    if (SummaryUtils.enableTrainingSummary) {
+      val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
+      val logRegSummary = if (numClasses <= 2) {
+        new BinaryLogisticRegressionTrainingSummaryImpl(
+          summaryModel.transform(dataset),
+          probabilityColName,
+          predictionColName,
+          $(labelCol),
+          $(featuresCol),
+          weightColName,
+          objectiveHistory)
+      } else {
+        new LogisticRegressionTrainingSummaryImpl(
+          summaryModel.transform(dataset),
+          probabilityColName,
+          predictionColName,
+          $(labelCol),
+          $(featuresCol),
+          weightColName,
+          objectiveHistory)
+      }
+      model.setSummary(Some(logRegSummary))
     }
-    model.setSummary(Some(logRegSummary))
+    model
   }
 
   private def createBounds(
 
@@ -249,14 +249,17 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
       objectiveHistory: Array[Double]): MultilayerPerceptronClassificationModel = {
     val model = copyValues(new MultilayerPerceptronClassificationModel(uid, weights))
 
-    val (summaryModel, _, predictionColName) = model.findSummaryModel()
-    val summary = new MultilayerPerceptronClassificationTrainingSummaryImpl(
-      summaryModel.transform(dataset),
-      predictionColName,
-      $(labelCol),
-      "",
-      objectiveHistory)
-    model.setSummary(Some(summary))
+    if (SummaryUtils.enableTrainingSummary) {
+      val (summaryModel, _, predictionColName) = model.findSummaryModel()
+      val summary = new MultilayerPerceptronClassificationTrainingSummaryImpl(
+        summaryModel.transform(dataset),
+        predictionColName,
+        $(labelCol),
+        "",
+        objectiveHistory)
+      model.setSummary(Some(summary))
+    }
+    model
   }
 }
 
 
@@ -185,23 +185,26 @@ class RandomForestClassifier @Since("1.4.0") (
     val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)
 
     val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
-    val rfSummary = if (numClasses <= 2) {
-      new BinaryRandomForestClassificationTrainingSummaryImpl(
-        summaryModel.transform(dataset),
-        probabilityColName,
-        predictionColName,
-        $(labelCol),
-        weightColName,
-        Array(0.0))
-    } else {
-      new RandomForestClassificationTrainingSummaryImpl(
-        summaryModel.transform(dataset),
-        predictionColName,
-        $(labelCol),
-        weightColName,
-        Array(0.0))
+    if (SummaryUtils.enableTrainingSummary) {
+      val rfSummary = if (numClasses <= 2) {
+        new BinaryRandomForestClassificationTrainingSummaryImpl(
+          summaryModel.transform(dataset),
+          probabilityColName,
+          predictionColName,
+          $(labelCol),
+          weightColName,
+          Array(0.0))
+      } else {
+        new RandomForestClassificationTrainingSummaryImpl(
+          summaryModel.transform(dataset),
+          predictionColName,
+          $(labelCol),
+          weightColName,
+          Array(0.0))
+      }
+      model.setSummary(Some(rfSummary))
     }
-    model.setSummary(Some(rfSummary))
+    model
   }
 
   @Since("1.4.1")
@@ -258,6 +261,8 @@ class RandomForestClassificationModel private[ml] (
   // For ml connect only
   private[ml] def this() = this("", Array(new DecisionTreeClassificationModel), -1, -1)
 
+  override def estimatedSize: Long = getEstimatedSize()
+
   @Since("1.4.0")
   override def trees: Array[DecisionTreeClassificationModel] = _trees
 
 
@@ -303,16 +303,19 @@ class BisectingKMeans @Since("2.0.0") (
     val parentModel = bkm.runWithWeight(instances, handlePersistence, Some(instr))
     val model = copyValues(new BisectingKMeansModel(uid, parentModel).setParent(this))
 
-    val summary = new BisectingKMeansSummary(
-      model.transform(dataset),
-      $(predictionCol),
-      $(featuresCol),
-      $(k),
-      $(maxIter),
-      parentModel.trainingCost)
-    instr.logNamedValue("clusterSizes", summary.clusterSizes)
-    instr.logNumFeatures(model.clusterCenters.head.size)
-    model.setSummary(Some(summary))
+    if (SummaryUtils.enableTrainingSummary) {
+      val summary = new BisectingKMeansSummary(
+        model.transform(dataset),
+        $(predictionCol),
+        $(featuresCol),
+        $(k),
+        $(maxIter),
+        parentModel.trainingCost)
+      instr.logNamedValue("clusterSizes", summary.clusterSizes)
+      instr.logNumFeatures(model.clusterCenters.head.size)
+      model.setSummary(Some(summary))
+    }
+    model
   }
 
   @Since("2.0.0")
 
@@ -430,11 +430,14 @@ class GaussianMixture @Since("2.0.0") (
 
     val model = copyValues(new GaussianMixtureModel(uid, weights, gaussianDists))
       .setParent(this)
-    val summary = new GaussianMixtureSummary(model.transform(dataset),
-      $(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood, iteration)
-    instr.logNamedValue("logLikelihood", logLikelihood)
-    instr.logNamedValue("clusterSizes", summary.clusterSizes)
-    model.setSummary(Some(summary))
+    if (SummaryUtils.enableTrainingSummary) {
+      val summary = new GaussianMixtureSummary(model.transform(dataset),
+        $(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood, iteration)
+      instr.logNamedValue("logLikelihood", logLikelihood)
+      instr.logNamedValue("clusterSizes", summary.clusterSizes)
+      model.setSummary(Some(summary))
+    }
+    model
   }
 
   private def trainImpl(
Original file line number	Diff line number	Diff line change
`@@ -195,6 +195,8 @@ class DecisionTreeClassificationModel private[ml] (`
`195`	`195`	`// For ml connect only`
`196`	`196`	`private[ml] def this() = this("", Node.dummyNode, -1, -1)`
`197`	`197`
	`198`	`+ override def estimatedSize: Long = getEstimatedSize()`
	`199`	`+`
`198`	`200`	`override def predict(features: Vector): Double = {`
`199`	`201`	`rootNode.predictImpl(features).prediction`
`200`	`202`	`}`