Skip to content
This repository has been archived by the owner on May 13, 2024. It is now read-only.

Commit

Permalink
first milestone of t2x (#1)
Browse files Browse the repository at this point in the history
* first milestone of t2x

* [Add nvidia_libdevice_bitcode]

* enable fifo and AOT (still have bugs in AOT)

* solve a bug in channel promotion

* rewrite channel promotion

* fix bugs

* fix a bug

* pass correctness tests

* [Cosmetic Revision]

* [Turn on WITH_EXCEPTIONS=1 for make by default. Revised README]

* [Add an intro video]

* [Revised README for GEMM]

* [Change all test.sh files to be executable]

* [Revised fft funct.h]

* [Make bash files executable]

* [Temporarily delete the intro video for being too large]

* [Add intro video again]

* [Add first slide image of the intro video]

* [Use URL for the intro video]

* [Remove embedding of video, which does not work]

* revise code and README

* Update README.md

* [Revised README for GEMM. Not fully done yet. To further revise]

* [Revise SGEMM README. Keep only AOT mode for simplicity. To verify on DevCloud]

* [Verified TINY emulation on DevCloud A10 1.2.1]

* [Recover gemm-run.cpp in aot]

* [Fix for security]

* [Revise GEMM README for more accurate instructions on DevCloud]

* fix a bug in insert_fpga_reg

* [Shared utilis for JIT runtime, AOT runtime, and roofline drawing. SGEMM perf test works]

* [Fix opencl.cpp: not to use SharedUtilsInC, which allocates and frees
 memory and sees to introduce issues with Halide]

* [Fix test/correctness/roofline]

* [Allow plotting rooflines without xserver]

* [Minor fixes to gemm perf test commands, etc.]

* [Misc tiny fixes]

Co-authored-by: xiaochen.hao <xiaochen.hao@stu.pku.edu.cn>
Co-authored-by: Hongbo Rong <hongbo.rong@intel.com>
  • Loading branch information
3 people authored Oct 18, 2021
1 parent e35b2a6 commit c940166
Show file tree
Hide file tree
Showing 74 changed files with 3,000 additions and 714 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

intro.mp4 filter=lfs diff=lfs merge=lfs -text
6 changes: 3 additions & 3 deletions ACKNOWLEDGEMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ Many academic researchers have contributed to the project:

Many Intel people have helped enabling the technology:

+ System Software Group: Geoff Lowney, John C. Kreatsoulas, Nithin George, Paul Petersen, Gorge Powley, Daya Khudia, Charlotte Dryden, Adam Herr
+ System Software Group: Geoff Lowney, John C. Kreatsoulas, Nithin George, Paul Petersen, Gorge Powley, Carmen Badea, Daya Khudia, Adam Herr, Charlotte Dryden, Pablo Reble, Vishakha Agrawal, Mike Voss, Vasanth Tovinkere

+ Intel Labs: Christopher J. Hughes, Pradeep Dubey, Jim Held, Timothy Mattson, Sanket Tavarageri, Kunal Banerjee, Bharat Kaul, Justin Gottschlich, Todd A. Anderson, Michael Beale

+ Accelerated Computing Systems and Graphics Group: Hong Jiang, Kari Pulli, Lidong Xu, Fangwen Fu, Hongzheng Li, Sabareesh Ganapathy
+ Accelerated Computing Systems and Graphics Group: Hong Jiang, Lidong Xu, Kari Pulli, Fangwen Fu, Hongzheng Li, Sabareesh Ganapathy

+ GPU Software Engineering Group: Kai Yu Chen, Guei-Yuan Lueh, Yuting Yang

+ Programmable Solution Group: Jose Alvarez, Bernhard Friebe, Mohamed Issa, Aravind Dasu, John Freeman, Davor Capalija, Tomasz Czajkowski
+ Programmable Solution Group: Jose Alvarez, Bernhard Friebe, Dan Prikster, Mohamed Issa, Aravind Dasu, John Freeman, Gordon Chiu, Davor Capalija, Tomasz Czajkowski, Andrei Hagiescu

+ Intel FPGA DevCloud: Lawrence Landis, Jimmy Tran
14 changes: 9 additions & 5 deletions Halide/Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

# 'make' builds libHalide.a, the internal test suite, and runs the internal test suite
# 'make run_tests' builds and runs all the end-to-end tests in the test subdirectory
# 'make {error,performance}_foo' builds and runs test/{...}/foo.cpp for any
Expand Down Expand Up @@ -136,7 +137,7 @@ ifeq ($(OS), Windows_NT)
else
WITH_INTROSPECTION ?= not-empty
endif
WITH_EXCEPTIONS ?=
WITH_EXCEPTIONS ?= 1
WITH_LLVM_INSIDE_SHARED_LIBHALIDE ?= not-empty

WITH_V8 ?=
Expand Down Expand Up @@ -796,6 +797,7 @@ T2S_DIR = $(ROOT_DIR)/../t2s
T2S_SOURCE_FILES = \
AutorunKernels.cpp \
BuildCallRelation.cpp \
ChannelPromotion.cpp \
CheckFuncConstraints.cpp \
CheckRecursiveCalls.cpp \
CombineChannels.cpp \
Expand All @@ -814,14 +816,15 @@ T2S_SOURCE_FILES = \
MinimizeShregs.cpp \
NoIfSimplify.cpp \
Overlay.cpp \
PatternMatcher.cpp \
Place.cpp \
PreprocessBeforeLower.cpp \
ScatterAndBuffer.cpp \
SliceExprTree.cpp \
SpaceTimeTransform.cpp \
Stensor.cpp \
StructType.cpp \
Utilities.cpp \
roofline.cpp
Utilities.cpp

T2S_HEADER_FILES = \
AutorunKernels.h \
Expand All @@ -841,14 +844,15 @@ T2S_HEADER_FILES = \
MinimizeShregs.h \
NoIfSimplify.h \
Overlay.h \
PatternMatcher.h \
Place.h \
PreprocessBeforeLower.h \
ScatterAndBuffer.h \
SliceExprTree.h \
SpaceTimeTransform.h \
Stensor.h \
StructType.h \
Utilities.h \
roofline.h
Utilities.h

OBJECTS += $(T2S_SOURCE_FILES:%.cpp=$(BUILD_DIR)/t2s/%.o)
HEADERS += $(T2S_HEADER_FILES:%.h=$(T2S_DIR)/src/%.h)
Expand Down
29 changes: 27 additions & 2 deletions Halide/apps/fft/complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ inline Halide::Expr im(Halide::Expr x) {
inline ComplexExpr conj(ComplexExpr z) {
return ComplexExpr(re(z), -im(z));
}
inline Halide::Expr conj(Halide::Expr x) {
return x;
}

// Unary negation.
inline ComplexExpr operator-(ComplexExpr z) {
Expand Down Expand Up @@ -106,6 +109,10 @@ inline ComplexExpr operator/(ComplexExpr a, Halide::Expr b) {
return ComplexExpr(re(a) / b, im(a) / b);
}

inline ComplexExpr operator/(ComplexExpr a, ComplexExpr b) {
return a * conj(b) / (re(b)*re(b) + im(b)*im(b));
}

// Compute exp(j*x)
inline ComplexExpr expj(Halide::Expr x) {
return ComplexExpr(Halide::cos(x), Halide::sin(x));
Expand All @@ -116,9 +123,14 @@ inline ComplexExpr sum(ComplexExpr z, const std::string &s = "sum") {
return ComplexExpr(Halide::sum(re(z), s + "_re"),
Halide::sum(im(z), s + "_im"));
}

inline ComplexExpr select(Halide::Expr c, ComplexExpr t, ComplexExpr f) {
return ComplexExpr(Halide::select(c, re(t), re(f)),
Halide::select(c, im(t), im(f)));
Halide::Expr re_part=Halide::select(c, re(t), re(f));
Halide::Expr im_part=Halide::select(c, im(t), im(f));
return ComplexExpr(re_part, im_part);
}
inline ComplexExpr select(Halide::Expr c, ComplexExpr t) {
return ComplexExpr(Halide::select(c, re(t)), Halide::select(c, im(t)));
}
inline ComplexExpr select(Halide::Expr c1, ComplexExpr t1,
Halide::Expr c2, ComplexExpr t2,
Expand All @@ -136,5 +148,18 @@ inline ComplexExpr cast(Halide::Type type, ComplexExpr z) {
inline ComplexExpr likely(ComplexExpr z) {
return ComplexExpr(Halide::likely(re(z)), Halide::likely(im(z)));
}
template<typename T>
inline T select(Halide::Expr c, ComplexExpr t, FuncRefT<T> f){
return select(c,t,f.toT());
}
template<typename T>
inline T select(Halide::Expr c, FuncRefT<T> t, ComplexExpr f){
return select(c,t.toT(),f);
}
template<typename T>
inline T select(Halide::Expr c, FuncRefT<T> t, FuncRefT<T> f){
return select(c,t.toT(),f.toT());
}


#endif
72 changes: 60 additions & 12 deletions Halide/apps/fft/funct.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#include <vector>

#include "Halide.h"
#include "../../../../Halide/src/Func.h"

struct ComplexExpr;
template<typename T>
class FuncRefT : public T {
Halide::FuncRef untyped;
Expand Down Expand Up @@ -34,14 +36,23 @@ class FuncRefT : public T {
Stage operator/=(T x) {
return untyped = T(Tuple(untyped)) / x;
}
T toT(){
if(typeid(ComplexExpr)==typeid(T)){
return ComplexExpr(Halide::Call::make(untyped.function(), untyped.arguments(), 0), Halide::Call::make(untyped.function(), untyped.arguments(), 1));
}
}
};


template<typename T>
class FuncT : public Halide::Func {
public:
typedef Halide::Var Var;
typedef Halide::Expr Expr;
typedef Halide::Func Func;
typedef Halide::Place Place;
typedef Halide::Type Type;
typedef Halide::FuncRef FuncRef;

explicit FuncT(const std::string &name)
: Func(name) {
Expand All @@ -54,10 +65,26 @@ class FuncT : public Halide::Func {
explicit FuncT(Func f)
: Func(f) {
}

explicit FuncT(Place place)
: Func(place) {
}
explicit FuncT(const std::string &name, Place place)
: Func(name, place) {
}

explicit FuncT(Halide::Internal::Function f)
: Func(f) {
}

explicit FuncT(const std::vector<Type> &return_types, const std::vector<Var> &args, Place place)
: Func(return_types, args, place) {
}

explicit FuncT(const std::string &name, const std::vector<Type> &return_types, const std::vector<Var> &args, Place place)
: Func(name, return_types, args, place) {
}

template<typename... Args>
FuncRefT<T> operator()(Args &&... args) const {
return Func::operator()(std::forward<Args>(args)...);
Expand All @@ -78,7 +105,7 @@ class FuncT : public Halide::Func {
// there is one.
template<typename T>
T operator-(FuncRefT<T> x) {
return -static_cast<T>(x);
return -x.toT();
}
template<typename T>
T operator~(FuncRefT<T> x) {
Expand All @@ -87,43 +114,64 @@ T operator~(FuncRefT<T> x) {

template<typename T>
T operator+(FuncRefT<T> a, T b) {
return static_cast<T>(a) + b;
return a.toT()+b;
}
template<typename T>
T operator-(FuncRefT<T> a, T b) {
return static_cast<T>(a) - b;
return a.toT()-b;
}
template<typename T>
T operator*(FuncRefT<T> a, T b) {
return static_cast<T>(a) * b;
return a.toT()*b;
}
template<typename T>
T operator/(FuncRefT<T> a, T b) {
return static_cast<T>(a) / b;
return a.toT()/b;
}
template<typename T>
T operator%(FuncRefT<T> a, T b) {
return static_cast<T>(a) % b;
return a.toT()%b;
}
template<typename T>
T operator+(T a, FuncRefT<T> b) {
return a + static_cast<T>(b);
return a+b.toT();
}
template<typename T>
T operator-(T a, FuncRefT<T> b) {
return a - static_cast<T>(b);
return a-b.toT();
}
template<typename T>
T operator*(T a, FuncRefT<T> b) {
return a * static_cast<T>(b);
return a*b.toT();
}
template<typename T>
T operator/(T a, FuncRefT<T> b) {
return a / static_cast<T>(b);
return a/b.toT();
}
template<typename T>
T operator%(T a, FuncRefT<T> b) {
return a % static_cast<T>(b);
return a%b.toT();
}

template<typename T>
T operator+(FuncRefT<T> a, FuncRefT<T> b) {
return a.toT()+b.toT();
}
template<typename T>
T operator-(FuncRefT<T> a, FuncRefT<T> b) {
return a.toT()-b.toT();
}
template<typename T>
T operator*(FuncRefT<T> a, FuncRefT<T> b) {
return a.toT()*b.toT();
}
template<typename T>
T operator/(FuncRefT<T> a, FuncRefT<T> b) {
return a.toT()/b.toT();
}
template<typename T>
T operator%(FuncRefT<T> a, FuncRefT<T> b) {
return a.toT()%b.toT();
}

template<typename T>
Expand Down Expand Up @@ -175,4 +223,4 @@ Halide::Expr operator>(T a, FuncRefT<T> b) {
return a > static_cast<T>(b);
}

#endif
#endif
10 changes: 5 additions & 5 deletions Halide/src/CodeGen_C.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1597,7 +1597,7 @@ class GatherKernelInfo : public IRVisitor {
CodeGen_C* parent;
public:
vector<string> kernel_names;

GatherKernelInfo() {}

void visit(const For *op) override {
Expand Down Expand Up @@ -1715,7 +1715,7 @@ void CodeGen_C::compile(const Module &input) {
f.body.accept(&g);
}

stream << "int MAX_DEVICES = 4;\n"
stream << "int MAX_DEVICES = 4;\n"
<< "int NUM_QUEUES_TO_CREATE = " << g.kernel_names.size() << ";\n"
<< "int NUM_KERNELS_TO_CREATE = " << g.kernel_names.size() << ";\n"
<< "cl_int status;\n"
Expand All @@ -1724,13 +1724,13 @@ void CodeGen_C::compile(const Module &input) {
<< "cl_device_id devices[4];\n"
<< "int current_kernel = 0;\n"
<< "cl_kernel kernel[" << g.kernel_names.size() << "];\n\n";

stream << "const char *kernel_name[] = {\n";
for (auto name : g.kernel_names) {
stream << " \"" << name << "\",\n";
}
stream << "};\n";

}

for (const auto &b : input.buffers()) {
Expand Down Expand Up @@ -2803,7 +2803,7 @@ void CodeGen_C::visit(const For *op) {
stream << "sizeof(cl_mem), "
<< "(void *)&((device_handle *)_halide_buffer_get_device(" << print_name(arg.name + ".buffer") << "))->mem";
} else {
stream << "sizeof(" << print_type(arg.type) << "), "
stream << "sizeof(" << print_type(arg.type) << "), "
<< "(void *)&" << arg.name;
}
stream << ");\n"
Expand Down
Loading

0 comments on commit c940166

Please sign in to comment.