Skip to content

Commit 2fcb78b

Browse files
authored
Merge pull request #39 from linkedin/dualip-lpsolver-parity
establish parity between external and internal solver
2 parents a7ef8f6 + 2e7718a commit 2fcb78b

File tree

73 files changed

+3909
-2179
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+3909
-2179
lines changed

dualip/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ test {
3030

3131
archivesBaseName = "${project.name}_2.12"
3232

33-
apply from: "$rootDir/gradle/java-publication.gradle"
33+
apply from: "$rootDir/gradle/java-publication.gradle"
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package com.linkedin.dualip.data
2+
3+
import breeze.linalg.{SparseVector => BSV}
4+
import com.linkedin.dualip.projection.Projection
5+
import com.linkedin.dualip.util.DataFormat.DataFormat
6+
7+
/**
8+
* case class for the dual variables corresponding to local and global constraints
9+
*
10+
* @param lambdaLocal : dual variables corresponding to local constraints
11+
* @param lambdaGlobal : dual variables corresponding to global constraints
12+
*/
13+
case class ConstrainedMatchingDuals(lambdaLocal: Array[Double], lambdaGlobal: Array[Double])
14+
15+
/**
16+
* case class for the dual variables corresponding to local and global constraints
17+
*
18+
* @param lambdaLocal : dual variables corresponding to local constraints in BSV format
19+
* @param lambdaGlobal : dual variables corresponding to global constraints in BSV format
20+
*/
21+
case class ConstrainedMatchingDualsBSV(lambdaLocal: BSV[Double], lambdaGlobal: BSV[Double])
22+
23+
/**
24+
* case class for constrained matching solver parameters
25+
*
26+
* @param constrainedMatchingDataPath : Path of A matrix, G matrix and c vector combined in a special data block
27+
* @param localBudgetPath : Path of the budgets corresponding to local constraints
28+
* @param globalBudgetPath : Path of the budgets corresponding to global constraints
29+
* @param format : The format of input data, e.g. avro or orc
30+
* @param numOfPartitions : number of partitions for sp
31+
* @param enableHighDimOptimization : enables high-dimensional optimization
32+
* @param numLambdaPartitions : number of partitions for the duals
33+
*/
34+
case class ConstrainedMatchingSolverParams(
35+
constrainedMatchingDataPath: String,
36+
localBudgetPath: String,
37+
globalBudgetPath: String,
38+
format: DataFormat,
39+
numOfPartitions: Int,
40+
enableHighDimOptimization: Boolean = false,
41+
numLambdaPartitions: Option[Int] = None)
42+
43+
44+
/**
45+
* case class for the A-G-c data block corresponding to the constrained matching problem
46+
*
47+
* @param id : row-id
48+
* @param data : c-A-G data corresponding to a given row-id
49+
* @param metadata : projection metadata
50+
*
51+
* The data field has the following structure.
52+
*
53+
* {
54+
* "name": "data",
55+
* "type": [
56+
* {
57+
* "type": "array",
58+
* "items": [
59+
* {
60+
* "type": "record",
61+
* "name": "data",
62+
* "fields": [
63+
* {
64+
* "name": "colId",
65+
* "type": "int"
66+
* },
67+
* {
68+
* "name": "c",
69+
* "type": "double"
70+
* },
71+
* {
72+
* "name": "A",
73+
* "type": "double"
74+
* },
75+
* {
76+
* "name": "G",
77+
* "type": "array",
78+
* "items": [
79+
* {
80+
* "fields": [
81+
* {
82+
* "name": "row-Id of G",
83+
* "type": "int"
84+
* },
85+
* {
86+
* "name": "value from G matrix",
87+
* "type": "double"
88+
* }
89+
* ]
90+
* }
91+
* ]
92+
* }
93+
* ]
94+
* }
95+
* ]
96+
* }
97+
* ]
98+
* }
99+
*/
100+
case class ConstrainedMatchingData(id: String, data: Seq[(Int, Double, Double, Seq[(Int, Double)])],
101+
metadata: Projection#Metadata = null)
102+
103+
object ConstrainedMatchingData {
104+
val optionalFields: Seq[String] = Seq("metadata")
105+
}
106+
107+
/**
108+
* case class for the budgets
109+
*
110+
* @param budgetLocal : budget corresponding to local constraints
111+
* @param budgetGlobal : budget corresponding to global constraints
112+
*/
113+
case class ConstrainedMatchingBudget(budgetLocal: BSV[Double], budgetGlobal: BSV[Double])
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.linkedin.dualip.data
2+
3+
import com.linkedin.dualip.projection.Projection
4+
5+
/**
6+
* Representation of the data block, used in slate generation
7+
* The assumption is that spark.Dataset[MatchingData] is going to be used to store the input data,
8+
* optimized for fast algorithm iterations.
9+
*
10+
* @param id - id of the block (i.e. impression id).
11+
* @param data - sparse vector of tuples: (rowId, c(rowId), a(rowId))
12+
* c(rowId) - is the objective function component of the corresponding variable
13+
* a(rowId) - is the element of the constraint diagonal element
14+
* @param metadata - features or metadata to be used for each block (string to number) mapping.
15+
*/
16+
case class MatchingData(id: String, data: Seq[(Int, Double, Double)], metadata: Projection#Metadata = null)
17+
18+
object MatchingData {
19+
val optionalFields: Seq[String] = Seq("metadata")
20+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package com.linkedin.dualip.data
2+
3+
/**
4+
* A MOO block of data. A vertical slice of design matrix, specifically the variables in the same simplex constraint sum x <= 1
5+
* We need to keep this data together because we need to do a simplex projection on it.
6+
*
7+
* Column (variable indices in "a" and "c" are relative, that is, variable is uniquely identified by
8+
* a combination of block id and internal id.
9+
*
10+
* internal representation is optimized for the operations that algorithm implements and data characteristics:
11+
* in particular, dense constraints matrix with few rows.
12+
*
13+
* @param id - unique identifier of the block, i.e. impression id for some problems
14+
* @param a - a dense constraints matrix a(row)(column)
15+
* @param c - a dense objective function vector
16+
* @param problemId - unique identifier for distinguishing a specific LP problem
17+
*/
18+
case class MooData(id: Long, a: Array[Array[Double]], c: Array[Double], problemId: Long)
19+
20+
/**
21+
* A constraint block of data. A data point of constraint vector.
22+
*
23+
* @param row - a specific row number of the constraint vector
24+
* @param value - the corresponding constraint value for the row
25+
* @param problemId - unique identifier for distinguishing a specific LP problem
26+
*/
27+
case class ConstraintBlock(row: Int, value: Double, problemId: Long)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package com.linkedin.dualip.data
2+
3+
import com.linkedin.dualip.projection.Projection
4+
5+
/**
6+
* case class for the Slate variables used in the non-differentiable solver
7+
*
8+
* @param itemId Item ID.
9+
* @param cost Contribution to the gradient by the given item (sum over slots of a_ijk x_ijk).
10+
* @param objective Contribution to the objective by the given item (sum over slots of c_ijk x_ijk).
11+
* @param slots Seq of tuples consisting of slot index and the primal values for a given item and slot, i.e.
12+
* (k, x_ijk).
13+
*/
14+
case class SlateNonDifferentiable(itemId: Int, cost: Double, objective: Double, slots: Seq[(Int, Double)])
15+
16+
/**
17+
* case class for the A-c data block corresponding to the multi-slate matching problem
18+
*
19+
* @param id Block ID.
20+
* @param data c-A data corresponding to a given block-id. The first entry of type Int in the data block is the
21+
* item ID. The second entry is a Seq of (Int, Double, Double) triplets, each corresponding to the
22+
* slot ID and the c and A values associated with that block ID, item ID and slot ID.
23+
* @param metadata : Projection metadata.
24+
*
25+
*/
26+
case class MultiSlateMatchingData(id: String, data: Seq[(Int, Seq[(Int, Double, Double)])],
27+
metadata: Projection#Metadata = null)
28+
29+
object MultiSlateMatchingData {
30+
val optionalFields: Seq[String] = Seq("metadata")
31+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.linkedin.dualip.data
2+
3+
import com.linkedin.dualip.projection.Projection
4+
5+
/**
6+
* case class for the A-c data block corresponding to the multiple matching problem
7+
*
8+
* @param id : block-id
9+
* @param data : c-A data corresponding to a given block-id. The first entry of type Int in the data block is the
10+
* item ID. The second entry of type Double is the c value associated with that (block, item). The
11+
* third entry is a Seq of (Int, Double) pairs, each corresponding to the constraint index and A value
12+
* for that (block, item, constraint).
13+
* @param metadata : projection metadata
14+
*/
15+
case class MultipleMatchingData(id: String, data: Seq[(Int, Double, Seq[(Int, Double)])],
16+
metadata: Projection#Metadata = null)
17+
18+
object MultipleMatchingData {
19+
val optionalFields: Seq[String] = Seq("metadata")
20+
}

0 commit comments

Comments
 (0)