Skip to content

Commit

Permalink
[NOOP][clangformat][codemod] Enable CLANGFORMAT for caffe2/caffe2/* (p…
Browse files Browse the repository at this point in the history
…ytorch#67624)

Summary: Pull Request resolved: pytorch#67624

Test Plan: Visual inspection. Sandcastle.

Reviewed By: malfet

Differential Revision: D31986628

fbshipit-source-id: c872bded7325997a2945dbf5d4d052628dcb3659
  • Loading branch information
gandalf2390 authored and facebook-github-bot committed Nov 3, 2021
1 parent e86a5a3 commit 06d1be2
Show file tree
Hide file tree
Showing 83 changed files with 1,231 additions and 947 deletions.
61 changes: 36 additions & 25 deletions caffe2/cuda_rtc/common_rtc.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
#include <cuda.h>
#include <nvrtc.h>

#define NVRTC_CHECK(condition) \
do { \
nvrtcResult result = condition; \
if (result != NVRTC_SUCCESS) { \
LOG(FATAL) << "Error at: " << __FILE__ << ":" << __LINE__ << ": " \
<< nvrtcGetErrorString(result); \
} \
} while(0)
#define NVRTC_CHECK(condition) \
do { \
nvrtcResult result = condition; \
if (result != NVRTC_SUCCESS) { \
LOG(FATAL) << "Error at: " << __FILE__ << ":" << __LINE__ << ": " \
<< nvrtcGetErrorString(result); \
} \
} while (0)

namespace caffe2 {

Expand All @@ -39,15 +39,14 @@ class CudaRTCFunction {
VLOG(1) << "function src:\n" << src;
// Actually do the compiling.
nvrtcProgram prog;
NVRTC_CHECK(nvrtcCreateProgram(
&prog, src.c_str(), nullptr, 0, nullptr, nullptr));
NVRTC_CHECK(
nvrtcCreateProgram(&prog, src.c_str(), nullptr, 0, nullptr, nullptr));
// Compile the program.
// TODO(Yangqing): how to find the current gpu architecture instead of hard
// coding it?
const char *nvrtc_opts[] = {"--gpu-architecture=compute_35",
"--use_fast_math"};
nvrtcResult compile_result = nvrtcCompileProgram(
prog, 2, nvrtc_opts);
const char* nvrtc_opts[] = {
"--gpu-architecture=compute_35", "--use_fast_math"};
nvrtcResult compile_result = nvrtcCompileProgram(prog, 2, nvrtc_opts);
if (compile_result != NVRTC_SUCCESS) {
size_t log_size;
NVRTC_CHECK(nvrtcGetProgramLogSize(prog, &log_size));
Expand All @@ -74,21 +73,33 @@ class CudaRTCFunction {
}

template <typename... Args>
void Launch(unsigned int gx, unsigned int gy, unsigned int gz,
unsigned int bx, unsigned int by, unsigned int bz,
unsigned int shared_mem, cudaStream_t stream,
Args... args) {
void Launch(
unsigned int gx,
unsigned int gy,
unsigned int gz,
unsigned int bx,
unsigned int by,
unsigned int bz,
unsigned int shared_mem,
cudaStream_t stream,
Args... args) {
CAFFE_ENFORCE(
module_loaded_, "Cannot call Launch before a module is loaded.");
void * args_voidp[] = {&args...};
void* args_voidp[] = {&args...};
CUDA_DRIVERAPI_ENFORCE(cuLaunchKernel(
kernel_, gx, gy, gz, bx, by, bz, shared_mem, stream, args_voidp, 0));
}

void LaunchEx(unsigned int gx, unsigned int gy, unsigned int gz,
unsigned int bx, unsigned int by, unsigned int bz,
unsigned int shared_mem, cudaStream_t stream,
void** extra) {
void LaunchEx(
unsigned int gx,
unsigned int gy,
unsigned int gz,
unsigned int bx,
unsigned int by,
unsigned int bz,
unsigned int shared_mem,
cudaStream_t stream,
void** extra) {
CAFFE_ENFORCE(
module_loaded_, "Cannot call Launch before a module is loaded.");
CUDA_DRIVERAPI_ENFORCE(cuLaunchKernel(
Expand All @@ -115,6 +126,6 @@ inline std::string GetUniqueName() {
return ss.str();
}

} // namepsace caffe2
} // namespace caffe2

#endif // CAFFE2_CUDA_RTC_COMMON_RTC_H_
#endif // CAFFE2_CUDA_RTC_COMMON_RTC_H_
41 changes: 20 additions & 21 deletions caffe2/cuda_rtc/elemenntwise_rtc_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@

namespace caffe2 {
namespace {
class ElementwiseRTCFunction
: public CudaRTCFunction<ElementwiseRTCFunction> {
class ElementwiseRTCFunction : public CudaRTCFunction<ElementwiseRTCFunction> {
public:
ElementwiseRTCFunction() : CudaRTCFunction(), name_(GetUniqueName()) {}

Expand All @@ -22,22 +21,21 @@ class ElementwiseRTCFunction
string name_;
};

template<>
template <>
string ElementwiseRTCFunction::GetSource(
int input_size, int output_size,
int input_size,
int output_size,
const string command_string) {
std::stringstream ss;
ss << "extern \"C\" __global__ void " << name_ <<
"(const size_t nthreads, \n";
ss << "extern \"C\" __global__ void " << name_
<< "(const size_t nthreads, \n";
// Insert the parameter list.
int remain_params = input_size + output_size;
for (int i = 0; i < input_size; ++i) {
ss << "const float* in" << i
<< ((remain_params--) ? ", \n" : "");
ss << "const float* in" << i << ((remain_params--) ? ", \n" : "");
}
for (int i = 0; i < output_size; ++i) {
ss << "float* out" << i
<< ((remain_params--) ? ", \n" : "");
ss << "float* out" << i << ((remain_params--) ? ", \n" : "");
}
ss << ") {\n"
"for (int index = blockIdx.x * blockDim.x + threadIdx.x;\n"
Expand All @@ -46,7 +44,7 @@ string ElementwiseRTCFunction::GetSource(
<< "}\n}";
return ss.str();
}
} // namespace
} // namespace

/**
* A GPU operator that can generate limited elementwise operations.
Expand Down Expand Up @@ -75,17 +73,17 @@ class ElementwiseRTCOp final : public Operator<CUDAContext> {
public:
ElementwiseRTCOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<CUDAContext>(operator_def, ws) {
const string src = OperatorBase::GetSingleArgument<string>(
"rtc_src", "");
const string src = OperatorBase::GetSingleArgument<string>("rtc_src", "");
CAFFE_ENFORCE(src.size(), "Op should have a non-zero source code size.");
func_.Compile(InputSize(), OutputSize(), src);
}
~ElementwiseRTCOp() override {}

bool RunOnDevice() override {
static_assert(sizeof(void*) == sizeof(size_t),
"The argbuffer relies on the assumption that void* and "
"size_t have the same size.");
static_assert(
sizeof(void*) == sizeof(size_t),
"The argbuffer relies on the assumption that void* and "
"size_t have the same size.");
vector<size_t> argBuffer_vec(InputSize() + OutputSize() + 1);
size_t* argBuffer = argBuffer_vec.data();
CAFFE_ENFORCE(
Expand All @@ -102,10 +100,11 @@ class ElementwiseRTCOp final : public Operator<CUDAContext> {
}
size_t argBufferSize = sizeof(argBuffer);
void* config[] = {
CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
CU_LAUNCH_PARAM_BUFFER_SIZE, &argBufferSize,
CU_LAUNCH_PARAM_END
};
CU_LAUNCH_PARAM_BUFFER_POINTER,
argBuffer,
CU_LAUNCH_PARAM_BUFFER_SIZE,
&argBufferSize,
CU_LAUNCH_PARAM_END};
func_.LaunchEx(
CAFFE_GET_BLOCKS(Input(0).numel()),
1,
Expand All @@ -127,4 +126,4 @@ namespace {
REGISTER_CUDA_OPERATOR_WITH_ENGINE(ElementwiseRTC, NVRTC, ElementwiseRTCOp);
}

} // namespace caffe2
} // namespace caffe2
61 changes: 44 additions & 17 deletions caffe2/cuda_rtc/pool_op_rtc_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

#include "caffe2/core/common_gpu.h"
#include "caffe2/core/context_gpu.h"
#include "caffe2/operators/pool_op.h"
#include "caffe2/cuda_rtc/common_rtc.h"
#include "caffe2/operators/pool_op.h"

namespace caffe2 {
namespace {
class AveragePool {};
class MaxPool {};
} // namespace
} // namespace

namespace {

Expand Down Expand Up @@ -98,7 +98,6 @@ __global__ void %s(
}
)";


class MaxPoolRTCFunction : public CudaRTCFunction<MaxPoolRTCFunction> {
public:
MaxPoolRTCFunction() : CudaRTCFunction(), name_(GetUniqueName()) {}
Expand Down Expand Up @@ -132,7 +131,6 @@ class MaxPoolGradientRTCFunction
string name_;
};


template <>
string MaxPoolRTCFunction::GetSource(
const int output_size,
Expand All @@ -149,9 +147,22 @@ string MaxPoolRTCFunction::GetSource(
const int pad_l) {
char buffer[65536];
int nbytes = snprintf(
buffer, 65536, kMaxPoolForwardNCHWSource, name_.c_str(), output_size,
channels, height, width, pooled_height, pooled_width, kernel_h, kernel_w,
stride_h, stride_w, pad_t, pad_l);
buffer,
65536,
kMaxPoolForwardNCHWSource,
name_.c_str(),
output_size,
channels,
height,
width,
pooled_height,
pooled_width,
kernel_h,
kernel_w,
stride_h,
stride_w,
pad_t,
pad_l);
DCHECK_GE(nbytes, 0);
DCHECK_LT(nbytes, 65536);
return string(buffer);
Expand All @@ -174,16 +185,29 @@ string MaxPoolGradientRTCFunction::GetSource(
const int pad_l) {
char buffer[65536];
int nbytes = snprintf(
buffer, 65536, kMaxPoolBackwardNCHWSource, name_.c_str(), output_size,
num, channels, height, width, pooled_height, pooled_width, kernel_h,
kernel_w, stride_h, stride_w, pad_t, pad_l);
buffer,
65536,
kMaxPoolBackwardNCHWSource,
name_.c_str(),
output_size,
num,
channels,
height,
width,
pooled_height,
pooled_width,
kernel_h,
kernel_w,
stride_h,
stride_w,
pad_t,
pad_l);
DCHECK_GE(nbytes, 0);
DCHECK_LT(nbytes, 65536);
return string(buffer);
}

} // namespace

} // namespace

class MaxPoolRTCOp final : public ConvPoolOpBase<CUDAContext> {
public:
Expand All @@ -196,7 +220,8 @@ class MaxPoolRTCOp final : public ConvPoolOpBase<CUDAContext> {

bool RunOnDeviceWithOrderNCHW() override {
auto& X = Input(0);
auto output_sizes = ConvPoolOpBase<CUDAContext>::GetOutputSize(X, X.dim32(1));
auto output_sizes =
ConvPoolOpBase<CUDAContext>::GetOutputSize(X, X.dim32(1));
auto* Y = Output(0, output_sizes, at::dtype<float>());

if (input_dims_ != X.sizes()) {
Expand Down Expand Up @@ -307,7 +332,9 @@ class MaxPoolGradientRTCOp final : public ConvPoolOpBase<CUDAContext> {

namespace {
REGISTER_CUDA_OPERATOR_WITH_ENGINE(MaxPool, NVRTC, MaxPoolRTCOp);
REGISTER_CUDA_OPERATOR_WITH_ENGINE(MaxPoolGradient, NVRTC,
MaxPoolGradientRTCOp);
} // namespace
} // namespace caffe2
REGISTER_CUDA_OPERATOR_WITH_ENGINE(
MaxPoolGradient,
NVRTC,
MaxPoolGradientRTCOp);
} // namespace
} // namespace caffe2
2 changes: 1 addition & 1 deletion caffe2/db/create_db_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ REGISTER_CPU_OPERATOR(CreateDB, CreateDBOp<CPUContext>);
OPERATOR_SCHEMA(CreateDB).NumInputs(0).NumOutputs(1);

NO_GRADIENT(CreateDB);
} // namespace caffe2
} // namespace caffe2
46 changes: 32 additions & 14 deletions caffe2/db/leveldb.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "caffe2/core/db.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/flags.h"
#include "caffe2/core/logging.h"
#include "leveldb/db.h"
#include "leveldb/write_batch.h"

Expand All @@ -19,13 +19,27 @@ class LevelDBCursor : public Cursor {
SeekToFirst();
}
~LevelDBCursor() override {}
void Seek(const string& key) override { iter_->Seek(key); }
bool SupportsSeek() override { return true; }
void SeekToFirst() override { iter_->SeekToFirst(); }
void Next() override { iter_->Next(); }
string key() override { return iter_->key().ToString(); }
string value() override { return iter_->value().ToString(); }
bool Valid() override { return iter_->Valid(); }
void Seek(const string& key) override {
iter_->Seek(key);
}
bool SupportsSeek() override {
return true;
}
void SeekToFirst() override {
iter_->SeekToFirst();
}
void Next() override {
iter_->Next();
}
string key() override {
return iter_->key().ToString();
}
string value() override {
return iter_->value().ToString();
}
bool Valid() override {
return iter_->Valid();
}

private:
std::unique_ptr<leveldb::Iterator> iter_;
Expand All @@ -47,8 +61,7 @@ class LevelDBTransaction : public Transaction {
leveldb::Status status = db_->Write(leveldb::WriteOptions(), batch_.get());
batch_.reset(new leveldb::WriteBatch());
CAFFE_ENFORCE(
status.ok(),
"Failed to write batch to leveldb. ", status.ToString());
status.ok(), "Failed to write batch to leveldb. ", status.ToString());
}

private:
Expand All @@ -71,12 +84,17 @@ class LevelDB : public DB {
leveldb::Status status = leveldb::DB::Open(options, source, &db_temp);
CAFFE_ENFORCE(
status.ok(),
"Failed to open leveldb ", source, ". ", status.ToString());
"Failed to open leveldb ",
source,
". ",
status.ToString());
db_.reset(db_temp);
VLOG(1) << "Opened leveldb " << source;
}

void Close() override { db_.reset(); }
void Close() override {
db_.reset();
}
unique_ptr<Cursor> NewCursor() override {
return make_unique<LevelDBCursor>(db_.get());
}
Expand All @@ -92,5 +110,5 @@ REGISTER_CAFFE2_DB(LevelDB, LevelDB);
// For lazy-minded, one can also call with lower-case name.
REGISTER_CAFFE2_DB(leveldb, LevelDB);

} // namespace db
} // namespace caffe2
} // namespace db
} // namespace caffe2
Loading

0 comments on commit 06d1be2

Please sign in to comment.