Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/taco/format.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class ModeFormat {
/// Properties of a mode format
enum Property {
FULL, NOT_FULL, ORDERED, NOT_ORDERED, UNIQUE, NOT_UNIQUE, BRANCHLESS,
NOT_BRANCHLESS, COMPACT, NOT_COMPACT
NOT_BRANCHLESS, COMPACT, NOT_COMPACT, ZEROLESS, NOT_ZEROLESS
};

/// Instantiates an undefined mode format
Expand Down Expand Up @@ -126,6 +126,7 @@ class ModeFormat {
bool isUnique() const;
bool isBranchless() const;
bool isCompact() const;
bool isZeroless() const;

/// Returns true if a mode format has a specific capability, false otherwise
bool hasCoordValIter() const;
Expand Down
1 change: 1 addition & 0 deletions include/taco/lower/iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class Iterator : public util::Comparable<Iterator> {
bool isUnique() const;
bool isBranchless() const;
bool isCompact() const;
bool isZeroless() const;

/// Capabilities supported by levels being iterated.
bool hasCoordIter() const;
Expand Down
2 changes: 1 addition & 1 deletion include/taco/lower/mode_format_compressed.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class CompressedModeFormat : public ModeFormatImpl {
public:
CompressedModeFormat();
CompressedModeFormat(bool isFull, bool isOrdered,
bool isUnique, long long allocSize = DEFAULT_ALLOC_SIZE);
bool isUnique, bool isZeroless, long long allocSize = DEFAULT_ALLOC_SIZE);

~CompressedModeFormat() override {}

Expand Down
2 changes: 1 addition & 1 deletion include/taco/lower/mode_format_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace taco {
class DenseModeFormat : public ModeFormatImpl {
public:
DenseModeFormat();
DenseModeFormat(const bool isOrdered, const bool isUnique);
DenseModeFormat(const bool isOrdered, const bool isUnique, const bool isZeroless);

~DenseModeFormat() override {}

Expand Down
3 changes: 2 additions & 1 deletion include/taco/lower/mode_format_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ std::ostream& operator<<(std::ostream&, const ModeFunction&);
class ModeFormatImpl {
public:
ModeFormatImpl(std::string name, bool isFull, bool isOrdered, bool isUnique,
bool isBranchless, bool isCompact, bool hasCoordValIter,
bool isBranchless, bool isCompact, bool isZeroless, bool hasCoordValIter,
bool hasCoordPosIter, bool hasLocate, bool hasInsert,
bool hasAppend);

Expand Down Expand Up @@ -162,6 +162,7 @@ class ModeFormatImpl {
const bool isUnique;
const bool isBranchless;
const bool isCompact;
const bool isZeroless;

const bool hasCoordValIter;
const bool hasCoordPosIter;
Expand Down
2 changes: 1 addition & 1 deletion include/taco/lower/mode_format_singleton.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class SingletonModeFormat : public ModeFormatImpl {
public:
SingletonModeFormat();
SingletonModeFormat(bool isFull, bool isOrdered,
bool isUnique, long long allocSize = DEFAULT_ALLOC_SIZE);
bool isUnique, bool isZeroless, long long allocSize = DEFAULT_ALLOC_SIZE);

~SingletonModeFormat() override {}

Expand Down
93 changes: 80 additions & 13 deletions src/codegen/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,47 @@ string CodeGen::printTensorProperty(string varname, const GetProperty* op, bool
}

string CodeGen::unpackTensorProperty(string varname, const GetProperty* op,
bool is_output_prop) {
bool is_output_prop, int flag, string output_tensor) {
stringstream ret;
ret << " ";

auto tensor = op->tensor.as<Var>();
if (op->property == TensorProperty::Values) {
// for the values, it's in the last slot
ret << printType(tensor->type, true);
ret << " " << restrictKeyword() << " " << varname << " = (" << printType(tensor->type, true) << ")(";
ret << tensor->name << "->vals);\n";
switch(flag) {
case PRINT_FUNC:
ret << printType(tensor->type, true);
ret << " " << restrictKeyword() << " " << varname << " = (" << printType(tensor->type, true) << ")(";
ret << tensor->name << "->vals);\n";
break;
case PRINT_MEM_HOST_TO_DEV:
ret << "gpuErrchk(cudaMalloc((void **)&";
ret << tensor->name << "_dev" << "->vals, ";
ret << "malloc_usable_size(";
ret << tensor->name << "->vals)));\n";

ret << " ";
ret << "cudaMemcpy(";
ret << tensor->name << "_dev" << "->vals, ";
ret << tensor->name << "->vals, ";
ret << "malloc_usable_size(";
ret << tensor->name << "->vals), ";
ret << "cudaMemcpyHostToDevice);\n";
break;
case PRINT_MEM_DEV_TO_HOST:
if(output_tensor == tensor->name) {
ret << "cudaMemcpy(";
ret << tensor->name << "->vals, ";
ret << tensor->name << "_dev->vals, ";
ret << "malloc_usable_size(";
ret << tensor->name << "->vals), ";
ret << "cudaMemcpyDevicetToHost);\n";
ret << " ";
}
ret << "cudaFree(";
ret << tensor->name << "_dev" << "->vals);\n";
break;
}
return ret.str();
} else if (op->property == TensorProperty::ValuesSize) {
ret << "int " << varname << " = " << tensor->name << "->vals_size;\n";
Expand All @@ -252,18 +283,54 @@ string CodeGen::unpackTensorProperty(string varname, const GetProperty* op,
// for a Fixed level, ptr is an int
// all others are int*
if (op->property == TensorProperty::Dimension) {
tp = "int";
ret << tp << " " << varname << " = (int)(" << tensor->name
<< "->dimensions[" << op->mode << "]);\n";
switch(flag) {
case PRINT_FUNC:
tp = "int";
ret << tp << " " << varname << " = (int)(" << tensor->name
<< "->dimensions[" << op->mode << "]);\n";
break;
case PRINT_MEM_HOST_TO_DEV:
ret << tensor->name << "_dev->dimension[" << op->mode << "] = " << tensor->name << "->dimension[" << op->mode << "];\n";
break;
}
} else {
taco_iassert(op->property == TensorProperty::Indices);
tp = "int*";
auto nm = op->index;
ret << tp << " " << restrictKeyword() << " " << varname << " = ";
ret << "(int*)(" << tensor->name << "->indices[" << op->mode;
ret << "][" << nm << "]);\n";
switch(flag) {
case PRINT_FUNC:
ret << tp << " " << restrictKeyword() << " " << varname << " = ";
ret << "(int*)(" << tensor->name << "->indices[" << op->mode;
ret << "][" << nm << "]);\n";
break;
case PRINT_MEM_HOST_TO_DEV:
ret << "gpuErrchk(cudaMalloc((void **)&";
ret << tensor->name << "_dev" << "->indices[" << op->mode << "][" << nm << "], ";
ret << "malloc_usable_size(";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "])));\n";

ret << " ";
ret << "cudaMemcpy(";
ret << tensor->name << "_dev" << "->indices[" << op->mode << "][" << nm << "], ";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "], ";
ret << "malloc_usable_size(";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "]), ";
ret << "cudaMemcpyHostToDevice);\n";
break;
case PRINT_MEM_DEV_TO_HOST:
if(output_tensor == tensor->name) {
ret << "cudaMemcpy(";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "], ";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "], ";
ret << "malloc_usable_size(";
ret << tensor->name << "_dev->indices[" << op->mode << "][" << nm << "]), ";
ret << "cudaMemcpyDevicetToHost);\n";
}
ret << "cudaFree(";
ret << tensor->name << "_dev" << "->indices[" << op->mode << "][" << nm << "]);\n";
break;
}
}

return ret.str();
}

Expand Down Expand Up @@ -312,7 +379,7 @@ string CodeGen::pointTensorProperty(std::string varname) {

// helper to print declarations
string CodeGen::printDecls(map<Expr, string, ExprCompare> varMap,
vector<Expr> inputs, vector<Expr> outputs) {
vector<Expr> inputs, vector<Expr> outputs, int flag, string output_tensor) {
stringstream ret;
unordered_set<string> propsAlreadyGenerated;

Expand Down Expand Up @@ -367,7 +434,7 @@ string CodeGen::printDecls(map<Expr, string, ExprCompare> varMap,
break;
}
} else {
ret << unpackTensorProperty(varMap[prop], prop, isOutputProp);
ret << unpackTensorProperty(varMap[prop], prop, isOutputProp, flag, output_tensor);
}
propsAlreadyGenerated.insert(varMap[prop]);
}
Expand Down
8 changes: 6 additions & 2 deletions src/codegen/codegen.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#ifndef TACO_CODEGEN_H
#define TACO_CODEGEN_H

#define PRINT_FUNC 0
#define PRINT_MEM_HOST_TO_DEV 1
#define PRINT_MEM_DEV_TO_HOST 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should replace these with an enum.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see. I will replace them with an enum.


#include <memory>
#include "taco/ir/ir.h"
#include "taco/ir/ir_printer.h"
Expand Down Expand Up @@ -43,7 +47,7 @@ class CodeGen : public IRPrinter {
std::vector<Expr> localVars, int labels,
std::string funcName);
std::string printDecls(std::map<Expr, std::string, ExprCompare> varMap,
std::vector<Expr> inputs, std::vector<Expr> outputs);
std::vector<Expr> inputs, std::vector<Expr> outputs, int flag, std::string output_tensor);
std::string printPack(std::map<std::tuple<Expr, TensorProperty, int, int>,
std::string> outputProperties, std::vector<Expr> outputs);
std::string printCoroutineFinish(int numYields, std::string funcName);
Expand All @@ -63,7 +67,7 @@ class CodeGen : public IRPrinter {

std::string printTensorProperty(std::string varname, const GetProperty* op, bool is_ptr);
std::string unpackTensorProperty(std::string varname, const GetProperty* op,
bool is_output_prop);
bool is_output_prop, int flag, std::string output_tensor);
std::string packTensorProperty(std::string varname, Expr tnsr, TensorProperty property,
int mode, int index);
std::string pointTensorProperty(std::string varname);
Expand Down
2 changes: 1 addition & 1 deletion src/codegen/codegen_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void CodeGen_C::visit(const Function* func) {
localVars = varFinder.localVars;

// Print variable declarations
out << printDecls(varFinder.varDecls, func->inputs, func->outputs) << endl;
out << printDecls(varFinder.varDecls, func->inputs, func->outputs, PRINT_FUNC, "") << endl;

if (emittingCoroutine) {
out << printContextDeclAndInit(varMap, localVars, numYields, func->name)
Expand Down
Loading