PaddlePaddle
diff --git a/‎CMakeLists.txt‎
Lines changed: 12 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎RELEASE.md‎
Lines changed: 69 additions & 0 deletions b/‎RELEASE.md‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎demo/quick_start/data/README.md‎
Lines changed: 9 additions & 0 deletions b/‎demo/quick_start/data/README.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎demo/quick_start/data/get_data.sh‎
Lines changed: 6 additions & 9 deletions b/‎demo/quick_start/data/get_data.sh‎
Lines changed: 6 additions & 9 deletions
diff --git a/‎demo/quick_start/data/pred.list‎
Lines changed: 0 additions & 1 deletion b/‎demo/quick_start/data/pred.list‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎demo/quick_start/data/pred.txt‎
Lines changed: 0 additions & 2 deletions b/‎demo/quick_start/data/pred.txt‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎demo/quick_start/preprocess.sh‎ ‎…tart/data/proc_from_raw_data/get_data.sh‎demo/quick_start/preprocess.sh renamed to demo/quick_start/data/proc_from_raw_data/get_data.sh
Lines changed: 27 additions & 11 deletions b/‎demo/quick_start/preprocess.sh‎ ‎…tart/data/proc_from_raw_data/get_data.sh‎demo/quick_start/preprocess.sh renamed to demo/quick_start/data/proc_from_raw_data/get_data.sh
Lines changed: 27 additions & 11 deletions
diff --git a/‎demo/quick_start/preprocess.py‎ ‎…rt/data/proc_from_raw_data/preprocess.py‎demo/quick_start/preprocess.py renamed to demo/quick_start/data/proc_from_raw_data/preprocess.py
Lines changed: 7 additions & 3 deletions b/‎demo/quick_start/preprocess.py‎ ‎…rt/data/proc_from_raw_data/preprocess.py‎demo/quick_start/preprocess.py renamed to demo/quick_start/data/proc_from_raw_data/preprocess.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎demo/semantic_role_labeling/data/get_data.sh‎
Lines changed: 4 additions & 4 deletions b/‎demo/semantic_role_labeling/data/get_data.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎doc/CMakeLists.txt‎
Lines changed: 2 additions & 17 deletions b/‎doc/CMakeLists.txt‎
Lines changed: 2 additions & 17 deletions
@@ -8,6 +8,17 @@ include(package)
 find_package(SWIG 2.0)
 find_package(CUDA QUIET)
 find_package(Protobuf REQUIRED)
+
+# Check protobuf library version.
+execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
+	OUTPUT_VARIABLE PROTOBUF_VERSION)
+string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
+
+set(PROTOBUF_3 OFF)
+if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0")
+    set(PROTOBUF_3 ON)
+endif()
+
 find_package(PythonLibs 2.7 REQUIRED)
 find_package(PythonInterp 2.7 REQUIRED)
 find_package(ZLIB REQUIRED)
@@ -41,7 +52,7 @@ option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
 option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
 
 if(NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING 
+    set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
         "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
         FORCE)
 endif()
 
@@ -0,0 +1,69 @@
+# Release v0.9.0
+
+## New Features:
+
+* New Layers
+  * bilinear interpolation layer.
+  * spatial pyramid-pool layer.
+  * de-convolution layer.
+  * maxout layer.
+* Support rectangle padding, stride, window and input for Pooling Operation.
+* Add —job=time in trainer, which can be used to print time info without compiler option -WITH_TIMER=ON.
+* Expose cost_weight/nce_layer in `trainer_config_helpers`
+* Add FAQ, concepts, h-rnn docs.
+* Add Bidi-LSTM and DB-LSTM to quick start demo @alvations
+* Add usage track scripts.
+
+## Improvements
+
+* Add Travis-CI for Mac OS X. Enable swig unittest in Travis-CI. Skip Travis-CI when only docs are changed.
+* Add code coverage tools.
+* Refine convolution layer to speedup and reduce GPU memory.
+* Speed up PyDataProvider2
+* Add ubuntu deb package build scripts.
+* Make Paddle use git-flow branching model.
+* PServer support no parameter blocks.
+
+## Bug Fixes
+
+* add zlib link to py_paddle
+* add input sparse data check for sparse layer at runtime
+* Bug fix for sparse matrix multiplication
+* Fix floating-point overflow problem of tanh
+* Fix some nvcc compile options
+* Fix a bug in yield dictionary in DataProvider
+* Fix SRL hang when exit.
+
+# Release v0.8.0beta.1
+New features:
+
+* Mac OSX is supported by source code. #138
+   * Both GPU and CPU versions of PaddlePaddle are supported.
+
+* Support CUDA 8.0
+
+* Enhance `PyDataProvider2`
+   * Add dictionary yield format. `PyDataProvider2` can yield a dictionary with key is data_layer's name, value is features.
+   * Add `min_pool_size` to control memory pool in provider.
+
+* Add `deb` install package & docker image for no_avx machines.
+   * Especially for cloud computing and virtual machines
+
+* Automatically disable `avx` instructions in cmake when machine's CPU don't support `avx` instructions.
+
+* Add Parallel NN api in trainer_config_helpers.
+
+* Add `travis ci` for Github
+
+Bug fixes:
+
+* Several bugs in trainer_config_helpers. Also complete the unittest for trainer_config_helpers
+* Check if PaddlePaddle is installed when unittest.
+* Fix bugs in GTX series GPU
+* Fix bug in MultinomialSampler
+
+Also more documentation was written since last release.
+
+# Release v0.8.0beta.0
+
+PaddlePaddle v0.8.0beta.0 release. The install package is not stable yet and it's a pre-release version.
@@ -0,0 +1,9 @@
+This dataset consists of electronics product reviews associated with
+binary labels (positive/negative) for sentiment classification.
+
+The preprocessed data can be downloaded by script `get_data.sh`.
+The data was derived from reviews_Electronics_5.json.gz at
+
+http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+
+If you want to process the raw data, you can use the script `proc_from_raw_data/get_data.sh`.
@@ -17,14 +17,11 @@ set -e
 DIR="$( cd "$(dirname "$0")" ; pwd -P )"
 cd $DIR
 
-echo "Downloading Amazon Electronics reviews data..."
-# http://jmcauley.ucsd.edu/data/amazon/
-wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+# Download the preprocessed data
+wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz
 
-echo "Downloading mosesdecoder..."
-#https://github.com/moses-smt/mosesdecoder
-wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
+# Extract package
+tar zxvf preprocessed_data.tar.gz
 
-unzip master.zip
-rm master.zip
-echo "Done."
+# Remove compressed package
+rm preprocessed_data.tar.gz
@@ -16,10 +16,26 @@
 # 1. size of pos : neg = 1:1.
 # 2. size of testing set = min(25k, len(all_data) * 0.1), others is traning set.
 # 3. distinct train set and test set.
-# 4. build dict
 
 set -e
 
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd $DIR
+
+# Download data
+echo "Downloading Amazon Electronics reviews data..."
+# http://jmcauley.ucsd.edu/data/amazon/
+wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+echo "Downloading mosesdecoder..."
+# https://github.com/moses-smt/mosesdecoder
+wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
+
+unzip master.zip
+rm master.zip
+
+##################
+# Preprocess data 
+echo "Preprocess data..."
 export LC_ALL=C
 UNAME_STR=`uname`
 
@@ -29,11 +45,11 @@ else
   SHUF_PROG='gshuf'
 fi
 
-mkdir -p data/tmp
-python preprocess.py -i data/reviews_Electronics_5.json.gz
+mkdir -p tmp
+python preprocess.py -i reviews_Electronics_5.json.gz
 # uniq and shuffle
-cd data/tmp
-echo 'uniq and shuffle...'
+cd tmp
+echo 'Uniq and shuffle...'
 cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
 cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
 
@@ -53,11 +69,11 @@ cat train.pos train.neg | ${SHUF_PROG} >../train.txt
 cat test.pos test.neg | ${SHUF_PROG} >../test.txt
 
 cd -
-echo 'data/train.txt' > data/train.list
-echo 'data/test.txt' > data/test.list
+echo 'train.txt' > train.list
+echo 'test.txt' > test.list
 
 # use 30k dict
-rm -rf data/tmp
-mv data/dict.txt data/dict_all.txt
-cat data/dict_all.txt | head -n 30001 > data/dict.txt
-echo 'preprocess finished'
+rm -rf tmp
+mv dict.txt dict_all.txt
+cat dict_all.txt | head -n 30001 > dict.txt
+echo 'Done.'
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-1. (remove HTML before or not)tokensizing
+1. Tokenize the words and punctuation 
 2. pos sample : rating score 5; neg sample: rating score 1-2.
 
 Usage:
@@ -76,7 +76,11 @@ def tokenize(sentences):
     sentences : a list of input sentences.
     return: a list of processed text.
     """
-    dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
+    dir = './mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
+    if not os.path.exists(dir):
+        sys.exit(
+            "The ./mosesdecoder-master/scripts/tokenizer/tokenizer.perl does not exists."
+        )
     tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
     assert isinstance(sentences, list)
     text = "\n".join(sentences)
@@ -104,7 +108,7 @@ def tokenize_batch(id):
         num_batch, instance, pre_fix = parse_queue.get()
         if num_batch == -1:  ### parse_queue finished
             tokenize_queue.put((-1, None, None))
-            sys.stderr.write("tokenize theread %s finish\n" % (id))
+            sys.stderr.write("Thread %s finish\n" % (id))
             break
         tokenize_instance = tokenize(instance)
         tokenize_queue.put((num_batch, tokenize_instance, pre_fix))
 
@@ -14,10 +14,10 @@
 # limitations under the License.
 set -e
 wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/verbDict.txt --no-check-certificate
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/targetDict.txt --no-check-certificate
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/wordDict.txt --no-check-certificate
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/emb --no-check-certificate
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt 
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt 
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb
 tar -xzvf conll05st-tests.tar.gz
 rm conll05st-tests.tar.gz
 cp ./conll05st-release/test.wsj/words/test.wsj.words.gz  .
 
@@ -15,32 +15,17 @@ set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
 # HTML output directory
 set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
 
-
-set(PADDLE_DOXYGEN_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/doxygen_xml")
-
 configure_file(
     "${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
     "${BINARY_BUILD_DIR}/conf.py"
     @ONLY)
 
-configure_file(
-    "${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in"
-    "${CMAKE_CURRENT_BINARY_DIR}/Doxyfile"
-    @ONLY
-  )
-
-add_custom_target(paddle_doxygen_docs ALL
-    ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-)
-
 sphinx_add_target(paddle_docs
                   html
                   ${BINARY_BUILD_DIR}
                   ${SPHINX_CACHE_DIR}
                   ${CMAKE_CURRENT_SOURCE_DIR}
                   ${SPHINX_HTML_DIR})
 
-add_dependencies(paddle_docs 
-  gen_proto_py
-  paddle_doxygen_docs)
+add_dependencies(paddle_docs
+  gen_proto_py)