Skip to content

not able to reproduce the training procedure #8

@ShrikanthSingh

Description

@ShrikanthSingh

I referred to the website where I found a piece of code to reproduce training of Char2Vec but it produces an error, can you please help. If it works I want to train it for my purpose on the german text.
website: https://hackernoon.com/chars2vec-character-based-language-model-for-handling-real-world-texts-with-spelling-errors-and-a3e4053a147d

code:

import chars2vec

dim = 50

path_to_model = 'path/to/model/directory'

X_train = [('mecbanizing', 'mechanizing'), # similar words, target is equal 0
           ('dicovery', 'dis7overy'), # similar words, target is equal 0
           ('prot$oplasmatic', 'prtoplasmatic'), # similar words, target is equal 0
           ('copulateng', 'lzateful'), # not similar words, target is equal 1
           ('estry', 'evadin6'), # not similar words, target is equal 1
           ('cirrfosis', 'afear') # not similar words, target is equal 1
          ]
y_train = [0, 0, 0, 1, 1, 1]
model_chars = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.',
               '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<',
               '=', '>', '?', '@', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
               'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
               'x', 'y', 'z']
my_c2v_model = chars2vec.train_model(dim, X_train, y_train, model_chars)
chars2vec.save_model(my_c2v_model, path_to_model)
words = ['list', 'of', 'words']
c2v_model = chars2vec.load_model(path_to_model)
word_embeddings = c2v_model.vectorize_words(words)

Error:

ValueError                                Traceback (most recent call last)
<ipython-input-14-23a592d19001> in <module>
      1 # Create and train chars2vec model using given training data
----> 2 my_c2v_model = chars2vec.train_model(dim, X_train, y_train, model_chars)
      3 
      4 # Save pretrained model
      5 chars2vec.save_model(my_c2v_model, path_to_model)

C:\ProgramData\Anaconda3\lib\site-packages\chars2vec\model.py in train_model(emb_dim, X_train, y_train, model_chars, max_epochs, patience, validation_split, batch_size)
    235 
    236     targets = [float(el) for el in y_train]
--> 237     c2v_model.fit(X_train, targets, max_epochs, patience, validation_split, batch_size)
    238 
    239     return c2v_model

C:\ProgramData\Anaconda3\lib\site-packages\chars2vec\model.py in fit(self, word_pairs, targets, max_epochs, patience, validation_split, batch_size)
    105         x_2_pad_seq = keras.preprocessing.sequence.pad_sequences(x_2)
    106 
--> 107         self.model.fit([x_1_pad_seq, x_2_pad_seq], targets,
    108                        batch_size=batch_size, epochs=max_epochs,
    109                        validation_split=validation_split,

~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
    106   def _method_wrapper(self, *args, **kwargs):
    107     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
--> 108       return method(self, *args, **kwargs)
    109 
    110     # Running inside `run_distribute_coordinator` already.

~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1037       # `Tensor` and `NumPy` input.
   1038       (x, y, sample_weight), validation_data = (
-> 1039           data_adapter.train_validation_split(
   1040               (x, y, sample_weight), validation_split=validation_split))
   1041 

~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\data_adapter.py in train_validation_split(arrays, validation_split)
   1372   unsplitable = [type(t) for t in flat_arrays if not _can_split(t)]
   1373   if unsplitable:
-> 1374     raise ValueError(
   1375         "`validation_split` is only supported for Tensors or NumPy "
   1376         "arrays, found following types in the input: {}".format(unsplitable))

ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>]

I tried but even then not resolved

y_train = [0, 0, 0, 1, 1, 1]
y_train = np.array(y_train)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions