Skip to content

Commit 7598846

Browse files
committed
Merge remote branch 'origin/develop' into fix_data_sources
2 parents 89a638b + 18fabf6 commit 7598846

File tree

218 files changed

+2030
-3134
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

218 files changed

+2030
-3134
lines changed

CMakeLists.txt

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ find_package(Protobuf REQUIRED)
1111

1212
# Check protobuf library version.
1313
execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
14-
OUTPUT_VARIABLE PROTOBUF_VERSION)
14+
OUTPUT_VARIABLE PROTOBUF_VERSION)
1515
string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
1616

1717
set(PROTOBUF_3 OFF)
@@ -25,8 +25,8 @@ find_package(ZLIB REQUIRED)
2525
find_package(NumPy REQUIRED)
2626
find_package(Threads REQUIRED)
2727
find_package(AVX QUIET)
28-
find_package(Glog)
29-
find_package(Gflags QUIET)
28+
find_package(Glog REQUIRED)
29+
find_package(Gflags REQUIRED)
3030
find_package(GTest)
3131
find_package(Sphinx)
3232
find_package(Doxygen)
@@ -40,8 +40,6 @@ option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND})
4040
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
4141
option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND})
4242
option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
43-
option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND})
44-
option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND})
4543
option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
4644
option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
4745
option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
@@ -136,16 +134,12 @@ else(WITH_RDMA)
136134
add_definitions(-DPADDLE_DISABLE_RDMA)
137135
endif(WITH_RDMA)
138136

139-
if(WITH_GLOG)
140-
add_definitions(-DPADDLE_USE_GLOG)
141-
include_directories(${LIBGLOG_INCLUDE_DIR})
142-
endif()
137+
# glog
138+
include_directories(${LIBGLOG_INCLUDE_DIR})
143139

144-
if(WITH_GFLAGS)
145-
add_definitions(-DPADDLE_USE_GFLAGS)
146-
add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
147-
include_directories(${GFLAGS_INCLUDE_DIRS})
148-
endif()
140+
#gflags
141+
add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
142+
include_directories(${GFLAGS_INCLUDE_DIRS})
149143

150144
if(WITH_TESTING)
151145
enable_testing()
@@ -169,5 +163,4 @@ add_subdirectory(paddle)
169163
add_subdirectory(python)
170164
if(WITH_DOC)
171165
add_subdirectory(doc)
172-
add_subdirectory(doc_cn)
173166
endif()

CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
./doc/howto/contribute_to_paddle_en.md

WORKSPACE

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ http_archive(
33
name="protobuf",
44
url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
55
sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
6-
strip_prefix="protobuf-3.1.0", )
6+
strip_prefix="protobuf-3.1.0")
77

88
# External dependency to gtest 1.7.0. This method comes from
99
# https://www.bazel.io/versions/master/docs/tutorial/cpp.html.
@@ -12,4 +12,20 @@ new_http_archive(
1212
url="https://github.com/google/googletest/archive/release-1.7.0.zip",
1313
sha256="b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0",
1414
build_file="third_party/gtest.BUILD",
15-
strip_prefix="googletest-release-1.7.0", )
15+
strip_prefix="googletest-release-1.7.0")
16+
17+
# External dependency to gflags. This method comes from
18+
# https://github.com/gflags/example/blob/master/WORKSPACE.
19+
new_git_repository(
20+
name="gflags",
21+
tag="v2.2.0",
22+
remote="https://github.com/gflags/gflags.git",
23+
build_file="third_party/gflags.BUILD")
24+
25+
# External dependency to glog. This method comes from
26+
# https://github.com/reyoung/bazel_playground/blob/master/WORKSPACE
27+
new_git_repository(
28+
name="glog",
29+
remote="https://github.com/google/glog.git",
30+
commit="b6a5e0524c28178985f0d228e9eaa43808dbec3c",
31+
build_file="third_party/glog.BUILD")

cmake/FindSphinx.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
7272
${source}
7373
${destination}
7474
COMMENT "Generating sphinx documentation: ${builder}"
75+
COMMAND ln -s ${destination}/index_*.html ${destination}/index.html
7576
)
7677

7778
set_property(
@@ -143,4 +144,4 @@ function( Sphinx_add_targets target_base_name conf source base_destination )
143144

144145
add_dependencies( ${target_base_name}_linkcheck ${_dependencies} )
145146
endif()
146-
endfunction()
147+
endfunction()

cmake/check_packages.cmake

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,9 @@ if(WITH_STYLE_CHECK)
1414
find_package(PythonInterp REQUIRED)
1515
endif()
1616

17-
if(WITH_GLOG)
18-
find_package(Glog REQUIRED)
19-
endif()
17+
find_package(Glog REQUIRED)
2018

21-
if(WITH_GFLAGS)
22-
find_package(Gflags REQUIRED)
23-
endif()
19+
find_package(Gflags REQUIRED)
2420

2521
if(WITH_TESTING)
2622
find_package(GTest REQUIRED)

cmake/util.cmake

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ endmacro()
6565
# link_paddle_exe
6666
# add paddle library for a paddle executable, such as trainer, pserver.
6767
#
68-
# It will handle WITH_PYTHON/WITH_GLOG etc.
68+
# It will handle WITH_PYTHON etc.
6969
function(link_paddle_exe TARGET_NAME)
7070
if(WITH_RDMA)
7171
generate_rdma_links()
@@ -108,6 +108,8 @@ function(link_paddle_exe TARGET_NAME)
108108
paddle_cuda
109109
${METRIC_LIBS}
110110
${PROTOBUF_LIBRARY}
111+
${LIBGLOG_LIBRARY}
112+
${GFLAGS_LIBRARIES}
111113
${CMAKE_THREAD_LIBS_INIT}
112114
${CBLAS_LIBS}
113115
${ZLIB_LIBRARIES}
@@ -119,27 +121,17 @@ function(link_paddle_exe TARGET_NAME)
119121
${RDMA_LD_FLAGS}
120122
${RDMA_LIBS})
121123
endif()
122-
124+
123125
if(WITH_PYTHON)
124126
target_link_libraries(${TARGET_NAME}
125127
${PYTHON_LIBRARIES})
126128
endif()
127129

128-
if(WITH_GLOG)
129-
target_link_libraries(${TARGET_NAME}
130-
${LIBGLOG_LIBRARY})
131-
endif()
132-
133-
if(WITH_GFLAGS)
134-
target_link_libraries(${TARGET_NAME}
135-
${GFLAGS_LIBRARIES})
136-
endif()
137-
138130
if(WITH_GPU)
139-
if(NOT WITH_DSO OR WITH_METRIC)
131+
if(NOT WITH_DSO OR WITH_METRIC)
140132
target_link_libraries(${TARGET_NAME}
141133
${CUDNN_LIBRARY}
142-
${CUDA_curand_LIBRARY})
134+
${CUDA_curand_LIBRARY})
143135
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
144136
endif()
145137

@@ -206,5 +198,5 @@ function(create_resources res_file output)
206198
# Convert hex data for C compatibility
207199
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
208200
# Append data to output file
209-
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
201+
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}0};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
210202
endfunction()

demo/semantic_role_labeling/data/extract_dict_feature.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ def extract_dict_features(pair_file, feature_file):
4343
mark[verb_index] = 1
4444
ctx_0 = sentence_list[verb_index]
4545

46-
if verb_index < len(labels_list) - 2:
46+
if verb_index < len(labels_list) - 1:
4747
mark[verb_index + 1] = 1
4848
ctx_p1 = sentence_list[verb_index + 1]
4949
else:
5050
ctx_p1 = 'eos'
5151

52-
if verb_index < len(labels_list) - 3:
52+
if verb_index < len(labels_list) - 2:
5353
mark[verb_index + 2] = 1
5454
ctx_p2 = sentence_list[verb_index + 2]
5555
else:

doc/CMakeLists.txt

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,50 @@ if(NOT DEFINED SPHINX_THEME_DIR)
77
endif()
88

99
# configured documentation tools and intermediate build results
10-
set(BINARY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
10+
set(BINARY_BUILD_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_build")
1111

1212
# Sphinx cache with pickled ReST documents
13-
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
13+
set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
1414

15-
# HTML output directory
16-
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
15+
# HTML output director
16+
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
1717

1818
configure_file(
19-
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
20-
"${BINARY_BUILD_DIR}/conf.py"
19+
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.en.in"
20+
"${BINARY_BUILD_DIR_EN}/conf.py"
2121
@ONLY)
2222

2323
sphinx_add_target(paddle_docs
2424
html
25-
${BINARY_BUILD_DIR}
26-
${SPHINX_CACHE_DIR}
25+
${BINARY_BUILD_DIR_EN}
26+
${SPHINX_CACHE_DIR_EN}
2727
${CMAKE_CURRENT_SOURCE_DIR}
28-
${SPHINX_HTML_DIR})
28+
${SPHINX_HTML_DIR_EN})
2929

3030
add_dependencies(paddle_docs
3131
gen_proto_py)
32+
33+
34+
# configured documentation tools and intermediate build results
35+
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
36+
37+
# Sphinx cache with pickled ReST documents
38+
set(SPHINX_CACHE_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_doctrees")
39+
40+
# HTML output directory
41+
set(SPHINX_HTML_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/html")
42+
43+
configure_file(
44+
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.cn.in"
45+
"${BINARY_BUILD_DIR_CN}/conf.py"
46+
@ONLY)
47+
48+
sphinx_add_target(paddle_docs_cn
49+
html
50+
${BINARY_BUILD_DIR_CN}
51+
${SPHINX_CACHE_DIR_CN}
52+
${CMAKE_CURRENT_SOURCE_DIR}
53+
${SPHINX_HTML_DIR_CN})
54+
55+
add_dependencies(paddle_docs_cn
56+
gen_proto_py)

doc/about/index_cn.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
关于PaddlePaddle
2+
================
3+
4+
PaddlePaddle是一个最早由百度科学家和工程师共同研发的并行分布式深度学习平台,兼备易用性、高效性、灵活性和可扩展性,目前已被百度内部多个产品线广泛使用。
5+
PaddlePaddle目前已经开放源码, 但是远未完善,我们希望能在这个基础上不断的改进、扩展和延伸。
6+
同时我们希望广大开发者积极提供反馈和贡献源代码,建立一个活跃的开源社区。
7+
8+
致谢
9+
--------
10+
11+
在此,特别感谢PaddlePaddle的[所有贡献者](https://github.com/PaddlePaddle/Paddle/graphs/contributors)
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
DataProvider的介绍
22
==================
33

4-
DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存,让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 `PyDataProvider2 <pydataprovider2.html>`_ ,来自定义传数据的过程。如果有更复杂的使用,或者需要更高的效率,用户也可以在C++端自定义一个 ``DataProvider`` 。
4+
DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存,让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 `PyDataProvider2 <pydataprovider2.html>`_ ,来自定义传数据的过程。如果有更复杂的使用,或者需要更高的效率,用户也可以在C++端自定义一个 ``DataProvider`` 。
55

66
PaddlePaddle需要用户在网络配置(trainer_config.py)中定义使用哪种DataProvider,并且在DataProvider中实现如何访问训练文件列表(train.list)或测试文件列表(test.list)。
77

8-
- train.list和test.list存放在本地(推荐直接存放到训练目录,以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址:
9-
10-
- 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。
11-
- 地址也可以为hdfs文件路径,或者数据库连接路径等。
12-
- 由于这个地址会被DataProvider使用,因此,如何解析该地址也是用户自定义DataProvider时需要考虑的地方。
8+
- train.list和test.list存放在本地(推荐直接存放到训练目录,以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址:
9+
10+
- 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。
11+
- 地址也可以为hdfs文件路径,或者数据库连接路径等。
12+
- 由于这个地址会被DataProvider使用,因此,如何解析该地址也是用户自定义DataProvider时需要考虑的地方。
1313
- 如果没有设置test.list,或设置为None,那么在训练过程中不会执行测试操作;否则,会根据命令行参数指定的测试方式,在训练过程中进行测试,从而防止过拟合。

0 commit comments

Comments
 (0)