/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
you may not use this file except in compliance with the License.
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
Unless required by applicable law or agreed to in writing, software
|
distributed under the License is distributed on an "AS IS" BASIS,
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
See the License for the specific language governing permissions and
|
limitations under the License.
|
==============================================================================*/
|
|
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
|
#include "tensorflow/core/framework/tensor.h"
|
#include "tensorflow/core/framework/types.h"
|
#include "tensorflow/core/platform/test.h"
|
#include "tensorflow/core/platform/test_benchmark.h"
|
|
namespace tensorflow {
|
|
template <typename T>
|
static Graph* LargeOneDCumsum(int num_x, bool reverse = false) {
|
auto* g = new Graph(OpRegistry::Global());
|
Tensor data(DataTypeToEnum<T>::value, TensorShape({num_x}));
|
data.flat<T>().setRandom();
|
Tensor axes(DT_INT32, TensorShape({}));
|
axes.flat<int32>()(0) = 0;
|
test::graph::Cumsum(g, test::graph::Constant(g, data),
|
test::graph::Constant(g, axes));
|
return g;
|
}
|
|
static Graph* ColCumsum(int num_x, int num_y, bool reverse = false) {
|
auto* g = new Graph(OpRegistry::Global());
|
Tensor data(DT_FLOAT, TensorShape({num_x, num_y}));
|
data.flat<float>().setRandom();
|
Tensor axes(DT_INT32, TensorShape({}));
|
axes.flat<int32>()(0) = 0;
|
test::graph::Cumsum(g, test::graph::Constant(g, data),
|
test::graph::Constant(g, axes));
|
return g;
|
}
|
|
static Graph* RowCumsum(int num_x, int num_y, bool reverse = false) {
|
auto* g = new Graph(OpRegistry::Global());
|
Tensor data(DT_FLOAT, TensorShape({num_x, num_y}));
|
data.flat<float>().setRandom();
|
Tensor axes(DT_INT32, TensorShape({}));
|
axes.flat<int32>()(0) = 1;
|
test::graph::Cumsum(g, test::graph::Constant(g, data),
|
test::graph::Constant(g, axes));
|
return g;
|
}
|
|
static Graph* ThreeDYCumsum(int num_y, int num_z, bool reverse = false) {
|
auto* g = new Graph(OpRegistry::Global());
|
Tensor data(DT_FLOAT, TensorShape({32, num_y, num_z}));
|
data.flat<float>().setRandom();
|
Tensor axes(DT_INT32, TensorShape({}));
|
axes.flat<int32>()(0) = 1;
|
test::graph::Cumsum(g, test::graph::Constant(g, data),
|
test::graph::Constant(g, axes));
|
return g;
|
}
|
|
template <typename T>
|
static void LargeOneDimensional(int iters, const string& device, int num_x,
|
bool reverse = false) {
|
testing::ItemsProcessed(static_cast<int64>(iters) * num_x);
|
testing::BytesProcessed(static_cast<int64>(iters) * num_x * sizeof(T));
|
test::Benchmark(device, LargeOneDCumsum<T>(num_x, reverse)).Run(iters);
|
}
|
|
static void DoRowCumsum(int iters, const string& device, int num_x, int num_y,
|
bool reverse = false) {
|
testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y);
|
testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y *
|
sizeof(float));
|
test::Benchmark(device, RowCumsum(num_x, num_y, reverse)).Run(iters);
|
}
|
|
static void DoColCumsum(int iters, const string& device, int num_x, int num_y,
|
bool reverse = false) {
|
testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y);
|
testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y *
|
sizeof(float));
|
test::Benchmark(device, ColCumsum(num_x, num_y, reverse)).Run(iters);
|
}
|
|
static void Do3DYCumsum(int iters, const string& device, int num_x, int num_y,
|
bool reverse = false) {
|
testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y);
|
testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y *
|
sizeof(float));
|
test::Benchmark(device, ThreeDYCumsum(num_x, num_y, reverse)).Run(iters);
|
}
|
|
static void BM_OneDCumsumGPU(int iters, int num_x) {
|
LargeOneDimensional<float>(iters, "gpu", num_x);
|
}
|
BENCHMARK(BM_OneDCumsumGPU)->Range(1, 1 << 21);
|
|
static void BM_OneDCumsumGPUHalf(int iters, int num_x) {
|
LargeOneDimensional<Eigen::half>(iters, "gpu", num_x);
|
}
|
BENCHMARK(BM_OneDCumsumGPUHalf)->Range(1, 1 << 21);
|
|
static void BM_Sum2DRowCumsumGPU(int iters, int num_x, int num_y) {
|
DoRowCumsum(iters, "gpu", num_x, num_y);
|
}
|
BENCHMARK(BM_Sum2DRowCumsumGPU)->RangePair(1, 8192, 1, 8192);
|
|
static void BM_Sum2DColumnCumsumGPU(int iters, int num_x, int num_y) {
|
DoColCumsum(iters, "gpu", num_x, num_y);
|
}
|
BENCHMARK(BM_Sum2DColumnCumsumGPU)->RangePair(1, 8192, 1, 8192);
|
|
static void BM_Sum3DYCumsumGPU(int iters, int num_x, int num_y) {
|
Do3DYCumsum(iters, "gpu", num_x, num_y);
|
}
|
BENCHMARK(BM_Sum3DYCumsumGPU)->RangePair(64, 4096, 64, 4096);
|
|
static void BM_OneDCumsumGPU_reverse(int iters, int num_x) {
|
LargeOneDimensional<float>(iters, "gpu", num_x, true);
|
}
|
BENCHMARK(BM_OneDCumsumGPU_reverse)->Range(1, 1 << 21);
|
|
static void BM_Sum2DRowCumsumGPU_reverse(int iters, int num_x, int num_y) {
|
DoRowCumsum(iters, "gpu", num_x, num_y, true);
|
}
|
BENCHMARK(BM_Sum2DRowCumsumGPU_reverse)->RangePair(1, 8192, 1, 8192);
|
|
static void BM_Sum2DColumnCumsumGPU_reverse(int iters, int num_x, int num_y) {
|
DoColCumsum(iters, "gpu", num_x, num_y, true);
|
}
|
BENCHMARK(BM_Sum2DColumnCumsumGPU_reverse)->RangePair(1, 8192, 1, 8192);
|
|
static void BM_Sum3DYCumsumGPU_reverse(int iters, int num_x, int num_y) {
|
Do3DYCumsum(iters, "gpu", num_x, num_y, true);
|
}
|
BENCHMARK(BM_Sum3DYCumsumGPU_reverse)->RangePair(32, 2048, 32, 2048);
|
|
} // end namespace tensorflow
|