Skip to content

Commit 20b416d

Browse files
author
Yi Wang
committed
Resolve conflict from git pull upstream develop
2 parents a3a7e76 + 98f4c76 commit 20b416d

File tree

167 files changed

+961
-3149
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

167 files changed

+961
-3149
lines changed

CMakeLists.txt

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@ include(package)
88
find_package(SWIG 2.0)
99
find_package(CUDA QUIET)
1010
find_package(Protobuf REQUIRED)
11+
12+
# Check protobuf library version.
13+
execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
14+
OUTPUT_VARIABLE PROTOBUF_VERSION)
15+
string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
16+
17+
set(PROTOBUF_3 OFF)
18+
if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0")
19+
set(PROTOBUF_3 ON)
20+
endif()
21+
1122
find_package(PythonLibs 2.7 REQUIRED)
1223
find_package(PythonInterp 2.7 REQUIRED)
1324
find_package(ZLIB REQUIRED)
@@ -41,7 +52,7 @@ option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
4152
option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
4253

4354
if(NOT CMAKE_BUILD_TYPE)
44-
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
55+
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
4556
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
4657
FORCE)
4758
endif()

RELEASE.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Release v0.9.0
2+
3+
## New Features:
4+
5+
* New Layers
6+
* bilinear interpolation layer.
7+
* spatial pyramid-pool layer.
8+
* de-convolution layer.
9+
* maxout layer.
10+
* Support rectangle padding, stride, window and input for Pooling Operation.
11+
* Add —job=time in trainer, which can be used to print time info without compiler option -WITH_TIMER=ON.
12+
* Expose cost_weight/nce_layer in `trainer_config_helpers`
13+
* Add FAQ, concepts, h-rnn docs.
14+
* Add Bidi-LSTM and DB-LSTM to quick start demo @alvations
15+
* Add usage track scripts.
16+
17+
## Improvements
18+
19+
* Add Travis-CI for Mac OS X. Enable swig unittest in Travis-CI. Skip Travis-CI when only docs are changed.
20+
* Add code coverage tools.
21+
* Refine convolution layer to speedup and reduce GPU memory.
22+
* Speed up PyDataProvider2
23+
* Add ubuntu deb package build scripts.
24+
* Make Paddle use git-flow branching model.
25+
* PServer support no parameter blocks.
26+
27+
## Bug Fixes
28+
29+
* add zlib link to py_paddle
30+
* add input sparse data check for sparse layer at runtime
31+
* Bug fix for sparse matrix multiplication
32+
* Fix floating-point overflow problem of tanh
33+
* Fix some nvcc compile options
34+
* Fix a bug in yield dictionary in DataProvider
35+
* Fix SRL hang when exit.
36+
37+
# Release v0.8.0beta.1
38+
New features:
39+
40+
* Mac OSX is supported by source code. #138
41+
* Both GPU and CPU versions of PaddlePaddle are supported.
42+
43+
* Support CUDA 8.0
44+
45+
* Enhance `PyDataProvider2`
46+
* Add dictionary yield format. `PyDataProvider2` can yield a dictionary with key is data_layer's name, value is features.
47+
* Add `min_pool_size` to control memory pool in provider.
48+
49+
* Add `deb` install package & docker image for no_avx machines.
50+
* Especially for cloud computing and virtual machines
51+
52+
* Automatically disable `avx` instructions in cmake when machine's CPU don't support `avx` instructions.
53+
54+
* Add Parallel NN api in trainer_config_helpers.
55+
56+
* Add `travis ci` for Github
57+
58+
Bug fixes:
59+
60+
* Several bugs in trainer_config_helpers. Also complete the unittest for trainer_config_helpers
61+
* Check if PaddlePaddle is installed when unittest.
62+
* Fix bugs in GTX series GPU
63+
* Fix bug in MultinomialSampler
64+
65+
Also more documentation was written since last release.
66+
67+
# Release v0.8.0beta.0
68+
69+
PaddlePaddle v0.8.0beta.0 release. The install package is not stable yet and it's a pre-release version.

demo/quick_start/data/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
This dataset consists of electronics product reviews associated with
2+
binary labels (positive/negative) for sentiment classification.
3+
4+
The preprocessed data can be downloaded by script `get_data.sh`.
5+
The data was derived from reviews_Electronics_5.json.gz at
6+
7+
http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
8+
9+
If you want to process the raw data, you can use the script `proc_from_raw_data/get_data.sh`.

demo/quick_start/data/get_data.sh

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,11 @@ set -e
1717
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
1818
cd $DIR
1919

20-
echo "Downloading Amazon Electronics reviews data..."
21-
# http://jmcauley.ucsd.edu/data/amazon/
22-
wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
20+
# Download the preprocessed data
21+
wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz
2322

24-
echo "Downloading mosesdecoder..."
25-
#https://github.com/moses-smt/mosesdecoder
26-
wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
23+
# Extract package
24+
tar zxvf preprocessed_data.tar.gz
2725

28-
unzip master.zip
29-
rm master.zip
30-
echo "Done."
26+
# Remove compressed package
27+
rm preprocessed_data.tar.gz

demo/quick_start/data/pred.list

Lines changed: 0 additions & 1 deletion
This file was deleted.

demo/quick_start/data/pred.txt

Lines changed: 0 additions & 2 deletions
This file was deleted.

demo/quick_start/preprocess.sh renamed to demo/quick_start/data/proc_from_raw_data/get_data.sh

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,26 @@
1616
# 1. size of pos : neg = 1:1.
1717
# 2. size of testing set = min(25k, len(all_data) * 0.1), others is traning set.
1818
# 3. distinct train set and test set.
19-
# 4. build dict
2019

2120
set -e
2221

22+
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
23+
cd $DIR
24+
25+
# Download data
26+
echo "Downloading Amazon Electronics reviews data..."
27+
# http://jmcauley.ucsd.edu/data/amazon/
28+
wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
29+
echo "Downloading mosesdecoder..."
30+
# https://github.com/moses-smt/mosesdecoder
31+
wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
32+
33+
unzip master.zip
34+
rm master.zip
35+
36+
##################
37+
# Preprocess data
38+
echo "Preprocess data..."
2339
export LC_ALL=C
2440
UNAME_STR=`uname`
2541

@@ -29,11 +45,11 @@ else
2945
SHUF_PROG='gshuf'
3046
fi
3147

32-
mkdir -p data/tmp
33-
python preprocess.py -i data/reviews_Electronics_5.json.gz
48+
mkdir -p tmp
49+
python preprocess.py -i reviews_Electronics_5.json.gz
3450
# uniq and shuffle
35-
cd data/tmp
36-
echo 'uniq and shuffle...'
51+
cd tmp
52+
echo 'Uniq and shuffle...'
3753
cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
3854
cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
3955

@@ -53,11 +69,11 @@ cat train.pos train.neg | ${SHUF_PROG} >../train.txt
5369
cat test.pos test.neg | ${SHUF_PROG} >../test.txt
5470

5571
cd -
56-
echo 'data/train.txt' > data/train.list
57-
echo 'data/test.txt' > data/test.list
72+
echo 'train.txt' > train.list
73+
echo 'test.txt' > test.list
5874

5975
# use 30k dict
60-
rm -rf data/tmp
61-
mv data/dict.txt data/dict_all.txt
62-
cat data/dict_all.txt | head -n 30001 > data/dict.txt
63-
echo 'preprocess finished'
76+
rm -rf tmp
77+
mv dict.txt dict_all.txt
78+
cat dict_all.txt | head -n 30001 > dict.txt
79+
echo 'Done.'

demo/quick_start/preprocess.py renamed to demo/quick_start/data/proc_from_raw_data/preprocess.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
"""
17-
1. (remove HTML before or not)tokensizing
17+
1. Tokenize the words and punctuation
1818
2. pos sample : rating score 5; neg sample: rating score 1-2.
1919
2020
Usage:
@@ -76,7 +76,11 @@ def tokenize(sentences):
7676
sentences : a list of input sentences.
7777
return: a list of processed text.
7878
"""
79-
dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
79+
dir = './mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
80+
if not os.path.exists(dir):
81+
sys.exit(
82+
"The ./mosesdecoder-master/scripts/tokenizer/tokenizer.perl does not exists."
83+
)
8084
tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
8185
assert isinstance(sentences, list)
8286
text = "\n".join(sentences)
@@ -104,7 +108,7 @@ def tokenize_batch(id):
104108
num_batch, instance, pre_fix = parse_queue.get()
105109
if num_batch == -1: ### parse_queue finished
106110
tokenize_queue.put((-1, None, None))
107-
sys.stderr.write("tokenize theread %s finish\n" % (id))
111+
sys.stderr.write("Thread %s finish\n" % (id))
108112
break
109113
tokenize_instance = tokenize(instance)
110114
tokenize_queue.put((num_batch, tokenize_instance, pre_fix))

demo/semantic_role_labeling/data/get_data.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
# limitations under the License.
1515
set -e
1616
wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
17-
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/verbDict.txt --no-check-certificate
18-
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/targetDict.txt --no-check-certificate
19-
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/wordDict.txt --no-check-certificate
20-
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/emb --no-check-certificate
17+
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt
18+
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt
19+
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt
20+
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb
2121
tar -xzvf conll05st-tests.tar.gz
2222
rm conll05st-tests.tar.gz
2323
cp ./conll05st-release/test.wsj/words/test.wsj.words.gz .

doc/CMakeLists.txt

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,32 +15,17 @@ set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
1515
# HTML output directory
1616
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
1717

18-
19-
set(PADDLE_DOXYGEN_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/doxygen_xml")
20-
2118
configure_file(
2219
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
2320
"${BINARY_BUILD_DIR}/conf.py"
2421
@ONLY)
2522

26-
configure_file(
27-
"${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in"
28-
"${CMAKE_CURRENT_BINARY_DIR}/Doxyfile"
29-
@ONLY
30-
)
31-
32-
add_custom_target(paddle_doxygen_docs ALL
33-
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
34-
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
35-
)
36-
3723
sphinx_add_target(paddle_docs
3824
html
3925
${BINARY_BUILD_DIR}
4026
${SPHINX_CACHE_DIR}
4127
${CMAKE_CURRENT_SOURCE_DIR}
4228
${SPHINX_HTML_DIR})
4329

44-
add_dependencies(paddle_docs
45-
gen_proto_py
46-
paddle_doxygen_docs)
30+
add_dependencies(paddle_docs
31+
gen_proto_py)

0 commit comments

Comments
 (0)