Skip to content

Commit fe66b86

Browse files
committed
Add code for blog
1 parent abb0126 commit fe66b86

File tree

1 file changed

+29
-0
lines changed

1 file changed

+29
-0
lines changed

src/fortest.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import pandas as pd
2+
from keras.preprocessing import sequence
3+
from keras.preprocessing.text import Tokenizer
4+
from sklearn.model_selection import train_test_split
5+
6+
class Preprocessing:
7+
8+
def __init__(self):
9+
self.data = '../data/tweets.csv'
10+
self.max_len = 10
11+
self.max_words = 100
12+
self.test_size = 0.5
13+
14+
def load_data(self):
15+
df = pd.read_csv(self.data)
16+
df.drop(['id','keyword','location'], axis=1, inplace=True)
17+
18+
X = df['text'].values
19+
Y = df['target'].values
20+
21+
self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(X, Y, test_size=self.test_size)
22+
23+
def prepare_tokens(self):
24+
self.tokens = Tokenizer(num_words=self.max_words)
25+
self.tokens.fit_on_texts(self.x_train)
26+
27+
def sequence_to_token(self, x):
28+
sequences = self.tokens.texts_to_sequences(x)
29+
return sequence.pad_sequences(sequences, maxlen=self.max_len)

0 commit comments

Comments
 (0)