Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,21 @@
"Cannot retrieve <objectName> from the ML cache. It is probably because the entry has been evicted."
]
},
"ML_CACHE_SIZE_OVERFLOW_EXCEPTION" : {
"message" : [
"The model cache size in current session is about to exceed",
"<mlCacheMaxSize> bytes.",
"Please delete existing cached model by executing 'del model' in python client before fitting new model or loading new model,",
"or increase Spark config 'spark.connect.session.connectML.mlCache.memoryControl.maxSize'."
]
},
"MODEL_SIZE_OVERFLOW_EXCEPTION" : {
"message" : [
"The fitted or loaded model size is about <modelSize> bytes.",
"Please fit or load a model smaller than <modelMaxSize> bytes,",
"or increase Spark config 'spark.connect.session.connectML.mlCache.memoryControl.maxModelSize'."
]
},
"UNSUPPORTED_EXCEPTION" : {
"message" : [
"<message>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ class DecisionTreeClassificationModel private[ml] (
// For ml connect only
private[ml] def this() = this("", Node.dummyNode, -1, -1)

override def estimatedSize: Long = getEstimatedSize()

override def predict(features: Vector): Double = {
rootNode.predictImpl(features).prediction
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,15 +224,18 @@ class FMClassifier @Since("3.0.0") (
val model = copyValues(new FMClassificationModel(uid, intercept, linear, factors))
val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)

val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
val summary = new FMClassificationTrainingSummaryImpl(
summaryModel.transform(dataset),
probabilityColName,
predictionColName,
$(labelCol),
weightColName,
objectiveHistory)
model.setSummary(Some(summary))
if (SummaryUtils.enableTrainingSummary) {
val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
val summary = new FMClassificationTrainingSummaryImpl(
summaryModel.transform(dataset),
probabilityColName,
predictionColName,
$(labelCol),
weightColName,
objectiveHistory)
model.setSummary(Some(summary))
}
model
}

@Since("3.0.0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ class GBTClassificationModel private[ml](
private[ml] def this() = this("",
Array(new DecisionTreeRegressionModel), Array(Double.NaN), -1, -1)

override def estimatedSize: Long = getEstimatedSize()

@Since("1.4.0")
override def trees: Array[DecisionTreeRegressionModel] = _trees

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,15 +277,18 @@ class LinearSVC @Since("2.2.0") (
val model = copyValues(new LinearSVCModel(uid, coefficients, intercept))
val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)

val (summaryModel, rawPredictionColName, predictionColName) = model.findSummaryModel()
val summary = new LinearSVCTrainingSummaryImpl(
summaryModel.transform(dataset),
rawPredictionColName,
predictionColName,
$(labelCol),
weightColName,
objectiveHistory)
model.setSummary(Some(summary))
if (SummaryUtils.enableTrainingSummary) {
val (summaryModel, rawPredictionColName, predictionColName) = model.findSummaryModel()
val summary = new LinearSVCTrainingSummaryImpl(
summaryModel.transform(dataset),
rawPredictionColName,
predictionColName,
$(labelCol),
weightColName,
objectiveHistory)
model.setSummary(Some(summary))
}
model
}

private def trainImpl(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -711,27 +711,30 @@ class LogisticRegression @Since("1.2.0") (
numClasses, checkMultinomial(numClasses)))
val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)

val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
val logRegSummary = if (numClasses <= 2) {
new BinaryLogisticRegressionTrainingSummaryImpl(
summaryModel.transform(dataset),
probabilityColName,
predictionColName,
$(labelCol),
$(featuresCol),
weightColName,
objectiveHistory)
} else {
new LogisticRegressionTrainingSummaryImpl(
summaryModel.transform(dataset),
probabilityColName,
predictionColName,
$(labelCol),
$(featuresCol),
weightColName,
objectiveHistory)
if (SummaryUtils.enableTrainingSummary) {
val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
val logRegSummary = if (numClasses <= 2) {
new BinaryLogisticRegressionTrainingSummaryImpl(
summaryModel.transform(dataset),
probabilityColName,
predictionColName,
$(labelCol),
$(featuresCol),
weightColName,
objectiveHistory)
} else {
new LogisticRegressionTrainingSummaryImpl(
summaryModel.transform(dataset),
probabilityColName,
predictionColName,
$(labelCol),
$(featuresCol),
weightColName,
objectiveHistory)
}
model.setSummary(Some(logRegSummary))
}
model.setSummary(Some(logRegSummary))
model
}

private def createBounds(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,14 +249,17 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
objectiveHistory: Array[Double]): MultilayerPerceptronClassificationModel = {
val model = copyValues(new MultilayerPerceptronClassificationModel(uid, weights))

val (summaryModel, _, predictionColName) = model.findSummaryModel()
val summary = new MultilayerPerceptronClassificationTrainingSummaryImpl(
summaryModel.transform(dataset),
predictionColName,
$(labelCol),
"",
objectiveHistory)
model.setSummary(Some(summary))
if (SummaryUtils.enableTrainingSummary) {
val (summaryModel, _, predictionColName) = model.findSummaryModel()
val summary = new MultilayerPerceptronClassificationTrainingSummaryImpl(
summaryModel.transform(dataset),
predictionColName,
$(labelCol),
"",
objectiveHistory)
model.setSummary(Some(summary))
}
model
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,23 +185,26 @@ class RandomForestClassifier @Since("1.4.0") (
val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)

val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
val rfSummary = if (numClasses <= 2) {
new BinaryRandomForestClassificationTrainingSummaryImpl(
summaryModel.transform(dataset),
probabilityColName,
predictionColName,
$(labelCol),
weightColName,
Array(0.0))
} else {
new RandomForestClassificationTrainingSummaryImpl(
summaryModel.transform(dataset),
predictionColName,
$(labelCol),
weightColName,
Array(0.0))
if (SummaryUtils.enableTrainingSummary) {
val rfSummary = if (numClasses <= 2) {
new BinaryRandomForestClassificationTrainingSummaryImpl(
summaryModel.transform(dataset),
probabilityColName,
predictionColName,
$(labelCol),
weightColName,
Array(0.0))
} else {
new RandomForestClassificationTrainingSummaryImpl(
summaryModel.transform(dataset),
predictionColName,
$(labelCol),
weightColName,
Array(0.0))
}
model.setSummary(Some(rfSummary))
}
model.setSummary(Some(rfSummary))
model
}

@Since("1.4.1")
Expand Down Expand Up @@ -258,6 +261,8 @@ class RandomForestClassificationModel private[ml] (
// For ml connect only
private[ml] def this() = this("", Array(new DecisionTreeClassificationModel), -1, -1)

override def estimatedSize: Long = getEstimatedSize()

@Since("1.4.0")
override def trees: Array[DecisionTreeClassificationModel] = _trees

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,16 +303,19 @@ class BisectingKMeans @Since("2.0.0") (
val parentModel = bkm.runWithWeight(instances, handlePersistence, Some(instr))
val model = copyValues(new BisectingKMeansModel(uid, parentModel).setParent(this))

val summary = new BisectingKMeansSummary(
model.transform(dataset),
$(predictionCol),
$(featuresCol),
$(k),
$(maxIter),
parentModel.trainingCost)
instr.logNamedValue("clusterSizes", summary.clusterSizes)
instr.logNumFeatures(model.clusterCenters.head.size)
model.setSummary(Some(summary))
if (SummaryUtils.enableTrainingSummary) {
val summary = new BisectingKMeansSummary(
model.transform(dataset),
$(predictionCol),
$(featuresCol),
$(k),
$(maxIter),
parentModel.trainingCost)
instr.logNamedValue("clusterSizes", summary.clusterSizes)
instr.logNumFeatures(model.clusterCenters.head.size)
model.setSummary(Some(summary))
}
model
}

@Since("2.0.0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -430,11 +430,14 @@ class GaussianMixture @Since("2.0.0") (

val model = copyValues(new GaussianMixtureModel(uid, weights, gaussianDists))
.setParent(this)
val summary = new GaussianMixtureSummary(model.transform(dataset),
$(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood, iteration)
instr.logNamedValue("logLikelihood", logLikelihood)
instr.logNamedValue("clusterSizes", summary.clusterSizes)
model.setSummary(Some(summary))
if (SummaryUtils.enableTrainingSummary) {
val summary = new GaussianMixtureSummary(model.transform(dataset),
$(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood, iteration)
instr.logNamedValue("logLikelihood", logLikelihood)
instr.logNamedValue("clusterSizes", summary.clusterSizes)
model.setSummary(Some(summary))
}
model
}

private def trainImpl(
Expand Down
22 changes: 12 additions & 10 deletions mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
Original file line number Diff line number Diff line change
Expand Up @@ -391,16 +391,18 @@ class KMeans @Since("1.5.0") (
}

val model = copyValues(new KMeansModel(uid, oldModel).setParent(this))
val summary = new KMeansSummary(
model.transform(dataset),
$(predictionCol),
$(featuresCol),
$(k),
oldModel.numIter,
oldModel.trainingCost)

model.setSummary(Some(summary))
instr.logNamedValue("clusterSizes", summary.clusterSizes)
if (SummaryUtils.enableTrainingSummary) {
val summary = new KMeansSummary(
model.transform(dataset),
$(predictionCol),
$(featuresCol),
$(k),
oldModel.numIter,
oldModel.trainingCost)

model.setSummary(Some(summary))
instr.logNamedValue("clusterSizes", summary.clusterSizes)
}
model
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ class DecisionTreeRegressionModel private[ml] (
// For ml connect only
private[ml] def this() = this("", Node.dummyNode, -1)

override def estimatedSize: Long = getEstimatedSize()

override def predict(features: Vector): Double = {
rootNode.predictImpl(features).prediction
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,8 @@ class GBTRegressionModel private[ml](
// For ml connect only
private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), Array(Double.NaN), -1)

override def estimatedSize: Long = getEstimatedSize()

@Since("1.4.0")
override def trees: Array[DecisionTreeRegressionModel] = _trees

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,12 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
val model = copyValues(
new GeneralizedLinearRegressionModel(uid, wlsModel.coefficients, wlsModel.intercept)
.setParent(this))
val trainingSummary = new GeneralizedLinearRegressionTrainingSummary(dataset, model,
wlsModel.diagInvAtWA.toArray, 1, getSolver)
model.setSummary(Some(trainingSummary))
if (SummaryUtils.enableTrainingSummary) {
val trainingSummary = new GeneralizedLinearRegressionTrainingSummary(dataset, model,
wlsModel.diagInvAtWA.toArray, 1, getSolver)
model.setSummary(Some(trainingSummary))
}
model
} else {
val instances = validated.rdd.map {
case Row(label: Double, weight: Double, offset: Double, features: Vector) =>
Expand All @@ -435,9 +438,12 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
val model = copyValues(
new GeneralizedLinearRegressionModel(uid, irlsModel.coefficients, irlsModel.intercept)
.setParent(this))
val trainingSummary = new GeneralizedLinearRegressionTrainingSummary(dataset, model,
irlsModel.diagInvAtWA.toArray, irlsModel.numIterations, getSolver)
model.setSummary(Some(trainingSummary))
if (SummaryUtils.enableTrainingSummary) {
val trainingSummary = new GeneralizedLinearRegressionTrainingSummary(dataset, model,
irlsModel.diagInvAtWA.toArray, irlsModel.numIterations, getSolver)
model.setSummary(Some(trainingSummary))
}
model
}

model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,13 +431,17 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
}

val model = createModel(parameters, yMean, yStd, featuresMean, featuresStd)
// Handle possible missing or invalid prediction columns
val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol()

val trainingSummary = new LinearRegressionTrainingSummary(
summaryModel.transform(dataset), predictionColName, $(labelCol), $(featuresCol),
model, Array(0.0), objectiveHistory)
model.setSummary(Some(trainingSummary))
if (SummaryUtils.enableTrainingSummary) {
// Handle possible missing or invalid prediction columns
val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol()

val trainingSummary = new LinearRegressionTrainingSummary(
summaryModel.transform(dataset), predictionColName, $(labelCol), $(featuresCol),
model, Array(0.0), objectiveHistory)
model.setSummary(Some(trainingSummary))
}
model
}

private def trainWithNormal(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ class RandomForestRegressionModel private[ml] (
// For ml connect only
private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), -1)

override def estimatedSize: Long = getEstimatedSize()

@Since("1.4.0")
override def trees: Array[DecisionTreeRegressionModel] = _trees

Expand Down
Loading