@@ -31,16 +31,16 @@ def initializer(settings, dictionary, **kwargs):
3131
3232 # setting.input_types specifies what the data types the data provider
3333 # generates.
34- settings .input_types = [
34+ settings .input_types = {
3535 # The first input is a sparse_binary_vector,
3636 # which means each dimension of the vector is either 0 or 1. It is the
3737 # bag-of-words (BOW) representation of the texts.
38- sparse_binary_vector (len (dictionary )),
38+ 'word' : sparse_binary_vector (len (dictionary )),
3939 # The second input is an integer. It represents the category id of the
4040 # sample. 2 means there are two labels in the dataset.
4141 # (1 for positive and 0 for negative)
42- integer_value (2 )
43- ]
42+ 'label' : integer_value (2 )
43+ }
4444
4545
4646# Delaring a data provider. It has an initializer 'data_initialzer'.
@@ -67,12 +67,12 @@ def process(settings, file_name):
6767 # Return the features for the current comment. The first is a list
6868 # of ids representing a 0-1 binary sparse vector of the text,
6969 # the second is the integer id of the label.
70- yield word_vector , int (label )
70+ yield { 'word' : word_vector , 'label' : int (label )}
7171
7272
7373def predict_initializer (settings , dictionary , ** kwargs ):
7474 settings .word_dict = dictionary
75- settings .input_types = [ sparse_binary_vector (len (dictionary ))]
75+ settings .input_types = { 'word' : sparse_binary_vector (len (dictionary ))}
7676
7777
7878# Declaring a data provider for prediction. The difference with process
@@ -83,4 +83,4 @@ def process_predict(settings, file_name):
8383 for line in f :
8484 comment = line .strip ().split ()
8585 word_vector = [settings .word_dict .get (w , UNK_IDX ) for w in comment ]
86- yield word_vector
86+ yield { 'word' : word_vector }
0 commit comments