Skip to content

Commit 7bb627d

Browse files
authored
Merge pull request #409 from luotao1/conv
Support rectangle input for CNN
2 parents a0a87ac + 7830893 commit 7bb627d

29 files changed

+387
-282
lines changed

paddle/gserver/layers/BatchNormBaseLayer.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,12 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
6060

6161
void BatchNormBaseLayer::calFeatureMapSize() {
6262
const ImageConfig& conf = config_.inputs(0).image_conf();
63-
if (inputLayers_[0]->getOutput().getFrameHeight() == 0 &&
64-
inputLayers_[0]->getOutput().getFrameWidth() == 0) {
65-
imgSize_ = conf.img_size();
66-
imageH_ = imgSize_;
67-
imageW_ = imgSize_;
63+
imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
64+
imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
65+
if (imageH_ == 0 && imageW_ == 0) {
66+
imageH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
67+
imageW_ = conf.img_size();
6868
} else {
69-
imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
70-
imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
7169
getOutput().setFrameHeight(imageH_);
7270
getOutput().setFrameWidth(imageW_);
7371
}

paddle/gserver/layers/BatchNormBaseLayer.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,8 @@ class BatchNormBaseLayer : public Layer {
7777
MatrixPtr savedMean_;
7878
MatrixPtr savedInvVar_;
7979

80-
/// Height or width of input image feature, now height is equal to width.
81-
/// imgSize is 1 if the input is fully-connected layer.
82-
int imgSize_;
80+
/// Height or width of input image feature.
81+
/// Both of them are 1 if the input is fully-connected layer.
8382
int imageH_;
8483
int imageW_;
8584
/// Height * Width.

paddle/gserver/layers/BilinearInterpLayer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@ size_t BilinearInterpLayer::getSize() {
2626

2727
const BilinearInterpConfig& conf = config_.inputs(0).bilinear_interp_conf();
2828
if (inImgH_ == 0) {
29-
inImgH_ = conf.img_size_y();
29+
inImgH_ = conf.image_conf().img_size_y();
3030
}
3131
if (inImgW_ == 0) {
32-
inImgW_ = conf.img_size_x();
32+
inImgW_ = conf.image_conf().img_size();
3333
}
3434

3535
outImgH_ = conf.out_size_y();
3636
outImgW_ = conf.out_size_x();
37-
numChannels_ = conf.num_channels();
37+
numChannels_ = conf.image_conf().channels();
3838

3939
CHECK(outImgH_ > 0 && outImgW_ > 0);
4040
CHECK(inImgH_ > 0 && inImgW_ > 0);

paddle/gserver/layers/ConvBaseLayer.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
3838
filterSizeY_.push_back(conf.filter_size_y());
3939
filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
4040
channels_.push_back(conf.channels());
41-
imgSizeH_.push_back(conf.img_size());
41+
imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y()
42+
: conf.img_size());
4243
imgSizeW_.push_back(conf.img_size());
4344
groups_.push_back(conf.groups());
4445
filterChannels_.push_back(conf.filter_channels());
45-
outputH_.push_back(conf.output_x());
46+
outputH_.push_back(conf.has_output_y() ? conf.output_y() : conf.output_x());
4647
outputW_.push_back(conf.output_x());
4748
}
4849

@@ -91,16 +92,19 @@ size_t ConvBaseLayer::calOutputSize() {
9192
for (size_t i = 0; i < inputLayers_.size(); i++) {
9293
inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
9394
inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
95+
const ConvConfig& conf = config_.inputs(i).conv_conf();
9496
if (isDeconv_) {
95-
if (inH[i] == 0) inH[i] = config_.inputs(i).conv_conf().output_x();
96-
if (inW[i] == 0) inW[i] = config_.inputs(i).conv_conf().output_x();
97+
if (inH[i] == 0)
98+
inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x();
99+
if (inW[i] == 0) inW[i] = conf.output_x();
97100
outH.push_back(imageSize(
98101
inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_));
99102
outW.push_back(imageSize(
100103
inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_));
101104
} else {
102-
if (inH[i] == 0) inH[i] = config_.inputs(i).conv_conf().img_size();
103-
if (inW[i] == 0) inW[i] = config_.inputs(i).conv_conf().img_size();
105+
if (inH[i] == 0)
106+
inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
107+
if (inW[i] == 0) inW[i] = conf.img_size();
104108
outH.push_back(outputSize(
105109
inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_));
106110
outW.push_back(outputSize(

paddle/gserver/layers/ConvOperator.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ class ConvOperator : public Operator {
9393
bool caffeMode_;
9494
int inputOffset_, outputOffset_, weightOffset_;
9595
int numFilters_;
96-
int padding_, stride_, filterSize_, channels_, imgSize_;
96+
int padding_, stride_, filterSize_, channels_, imgSize_, imgSizeY_;
9797
int paddingY_, strideY_, filterSizeY_;
98-
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputs_;
98+
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
9999

100100
/// Following member variables are same with CudnnConvLayer.
101101
/// There is no explanation here.
@@ -144,7 +144,7 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
144144
void ConvOperator::reshape(int batchSize) {
145145
imageH_ = ins_[0]->getFrameHeight();
146146
imageW_ = ins_[0]->getFrameWidth();
147-
if (imageH_ == 0) imageH_ = imgSize_;
147+
if (imageH_ == 0) imageH_ = imgSizeY_;
148148
if (imageW_ == 0) imageW_ = imgSize_;
149149
outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
150150
outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
@@ -182,7 +182,10 @@ void ConvOperator::computeConvSizes() {
182182
hl_create_tensor_descriptor(&inputDesc_);
183183
int outputX =
184184
outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
185+
int outputY =
186+
outputSize(imgSizeY_, filterSizeY_, paddingY_, strideY_, caffeMode_);
185187
CHECK_EQ(outputX, outputX_);
188+
CHECK_EQ(outputY, outputY_);
186189
hl_create_tensor_descriptor(&outputDesc_);
187190
hl_create_convolution_descriptor(&convDesc_,
188191
inputDesc_,
@@ -236,10 +239,12 @@ void ConvOperator::getConvParams() {
236239
filterPixels_ = filterSize_ * filterSizeY_;
237240
channels_ = conf.channels();
238241
imgSize_ = conf.img_size();
239-
imgPixels_ = imgSize_ * imgSize_;
242+
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
243+
imgPixels_ = imgSize_ * imgSizeY_;
240244
CHECK_EQ(conf.groups(), 1U);
241245
filterChannels_ = conf.filter_channels();
242246
outputX_ = conf.output_x();
247+
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
243248
outputs_ = outputX_ * outputX_;
244249
}
245250

paddle/gserver/layers/ConvProjection.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void ConvProjection::getConvParams() {
4646
filterH_ = conf.filter_size_y();
4747
filterW_ = conf.filter_size();
4848

49-
configImgH_ = conf.img_size();
49+
configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
5050
configImgW_ = conf.img_size();
5151

5252
channels_ = conf.channels();
@@ -58,9 +58,11 @@ void ConvProjection::getConvParams() {
5858
}
5959

6060
void ConvProjection::initCudnn() {
61-
hl_create_filter_descriptor(
62-
&filterDesc_, channels_ / groups_, numFilters_ / groups_,
63-
filterH_, filterW_);
61+
hl_create_filter_descriptor(&filterDesc_,
62+
channels_ / groups_,
63+
numFilters_ / groups_,
64+
filterH_,
65+
filterW_);
6466
hl_create_tensor_descriptor(&inputDesc_);
6567
hl_create_tensor_descriptor(&outputDesc_);
6668
hl_create_convolution_descriptor(&convDesc_,

paddle/gserver/layers/DataLayer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,13 @@ void DataLayer::copyDataToOutput(Argument& output) {
4949
output.ids->copyFrom(*data_.ids);
5050
}
5151
}
52-
output.setFrameHeight(data_.getFrameHeight());
53-
output.setFrameWidth(data_.getFrameWidth());
52+
if (config_.height() && config_.width()) {
53+
output.setFrameHeight(config_.height());
54+
output.setFrameWidth(config_.width());
55+
} else {
56+
output.setFrameHeight(data_.getFrameHeight());
57+
output.setFrameHeight(data_.getFrameHeight());
58+
}
5459
output.cpuSequenceDims = data_.cpuSequenceDims;
5560
output.sequenceStartPositions = data_.sequenceStartPositions;
5661
output.subSequenceStartPositions = data_.subSequenceStartPositions;

paddle/gserver/layers/ExpandConvBaseLayer.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,19 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
2929
* meaning as in conv, we need to swap channels_ and numFilters here for
3030
* convTrans, and in other functions too.
3131
* */
32-
int channel;
33-
int numFilters;
32+
3433
/* Initialize the projection */
3534
for (auto &inputConfig : config_.inputs()) {
3635
const ConvConfig &conf = inputConfig.conv_conf();
37-
numFilters = isDeconv_ ? conf.channels() : numFilters_;
36+
int numFilters = isDeconv_ ? conf.channels() : numFilters_;
3837
subM_.push_back(numFilters / conf.groups());
39-
subN_.push_back(conf.output_x() * conf.output_x());
40-
channel = isDeconv_ ? numFilters_ : conf.channels();
41-
subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
42-
conf.groups());
38+
subN_.push_back(conf.output_x() *
39+
(conf.has_output_y() ? conf.output_y() : conf.output_x()));
40+
int channel = isDeconv_ ? numFilters_ : conf.channels();
41+
subK_.push_back(
42+
channel * conf.filter_size() *
43+
(conf.has_filter_size_y() ? conf.filter_size_y() : conf.filter_size()) /
44+
conf.groups());
4345
/* Consistent caffe mode for multiple input */
4446
caffeMode_ = conf.caffe_mode();
4547
}
@@ -116,11 +118,11 @@ void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image,
116118
imgSizeH_[inIdx],
117119
imgSizeW_[inIdx],
118120
channel,
121+
filterSizeY_[inIdx],
119122
filterSize_[inIdx],
120-
filterSize_[inIdx],
123+
strideY_[inIdx],
121124
stride_[inIdx],
122-
stride_[inIdx],
123-
padding_[inIdx],
125+
paddingY_[inIdx],
124126
padding_[inIdx],
125127
outputH_[inIdx],
126128
outputW_[inIdx]);
@@ -208,11 +210,11 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out,
208210
imgSizeH_[inpIdx],
209211
imgSizeW_[inpIdx],
210212
channel,
213+
filterSizeY_[inpIdx],
211214
filterSize_[inpIdx],
212-
filterSize_[inpIdx],
213-
stride_[inpIdx],
215+
strideY_[inpIdx],
214216
stride_[inpIdx],
215-
padding_[inpIdx],
217+
paddingY_[inpIdx],
216218
padding_[inpIdx],
217219
outputH_[inpIdx],
218220
outputW_[inpIdx],

paddle/gserver/layers/MaxOutLayer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ size_t MaxOutLayer::getSize() {
2525
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
2626
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
2727
if (imgSizeH_ == 0) {
28-
imgSizeH_ = maxoutConf.img_size_y();
28+
imgSizeH_ = maxoutConf.image_conf().img_size_y();
2929
}
3030
if (imgSizeW_ == 0) {
31-
imgSizeW_ = maxoutConf.img_size_x();
31+
imgSizeW_ = maxoutConf.image_conf().img_size();
3232
}
3333

3434
featLen_ = imgSizeH_ * imgSizeW_;
@@ -50,7 +50,7 @@ bool MaxOutLayer::init(const LayerMap& layerMap,
5050

5151
const MaxOutConfig& conf = config_.inputs(0).maxout_conf();
5252
groups_ = conf.groups();
53-
channels_ = conf.channels();
53+
channels_ = conf.image_conf().channels();
5454
CHECK_EQ(channels_ % groups_, 0UL);
5555
outputChannels_ = channels_ / groups_;
5656

paddle/gserver/layers/NormLayer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
4848
outputX_ = conf.output_x();
4949
imgSize_ = conf.img_size();
5050
denoms_ = NULL;
51+
52+
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
53+
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
5154
return true;
5255
}
5356

0 commit comments

Comments
 (0)