@@ -195,16 +195,99 @@ class Lexicon {
195195 phonetic_str.c_str ());
196196 }
197197
198- void convert (const std::string& text, std::vector<int >& phones, std::vector<int >& tones)
198+ std::vector<std::string> splitTextByPunctuation (const std::string& text)
199+ {
200+ std::vector<std::string> segments;
201+ auto chars = splitEachChar (text);
202+ std::string current_segment;
203+
204+ for (size_t i = 0 ; i < chars.size (); ++i) {
205+ std::string c = chars[i];
206+ current_segment += c;
207+
208+ bool is_segment_punct = false ;
209+ std::string punct_key = c;
210+
211+ if (c == " ," )
212+ punct_key = " ," ;
213+ else if (c == " 。" )
214+ punct_key = " ." ;
215+ else if (c == " !" )
216+ punct_key = " !" ;
217+ else if (c == " ?" )
218+ punct_key = " ?" ;
219+
220+ if (lexicon.find (punct_key) != lexicon.end () &&
221+ (punct_key == " ." || punct_key == " !" || punct_key == " ?" || punct_key == " ," || punct_key == " …" )) {
222+ is_segment_punct = true ;
223+ }
224+
225+ if (is_segment_punct && i < chars.size () - 1 ) {
226+ segments.push_back (current_segment);
227+ current_segment.clear ();
228+ }
229+ }
230+
231+ if (!current_segment.empty ()) {
232+ segments.push_back (current_segment);
233+ }
234+
235+ return segments;
236+ }
237+ std::vector<std::string> mergeShortSegments (const std::vector<std::string>& segments, int min_length = 4 )
238+ {
239+ std::vector<std::string> merged_segments;
240+ std::string current_segment;
241+
242+ for (size_t i = 0 ; i < segments.size (); ++i) {
243+ auto chars = splitEachChar (segments[i]);
244+ int actual_chars = 0 ;
245+ for (const auto & c : chars) {
246+ if (c != " " && lexicon.find (c) != lexicon.end ()) {
247+ std::string punct_key = c;
248+ if (c == " ," )
249+ punct_key = " ," ;
250+ else if (c == " 。" )
251+ punct_key = " ." ;
252+ else if (c == " !" )
253+ punct_key = " !" ;
254+ else if (c == " ?" )
255+ punct_key = " ?" ;
256+
257+ if (punct_key != " ," && punct_key != " ." && punct_key != " !" && punct_key != " ?" &&
258+ punct_key != " …" && punct_key != " '" && punct_key != " -" ) {
259+ actual_chars++;
260+ }
261+ } else if (is_english (c)) {
262+ actual_chars++;
263+ }
264+ }
265+ if (actual_chars < min_length && i < segments.size () - 1 ) {
266+ if (current_segment.empty ()) {
267+ current_segment = segments[i];
268+ } else {
269+ current_segment += segments[i];
270+ }
271+ } else {
272+ if (!current_segment.empty ()) {
273+ current_segment += segments[i];
274+ merged_segments.push_back (current_segment);
275+ current_segment.clear ();
276+ } else {
277+ merged_segments.push_back (segments[i]);
278+ }
279+ }
280+ }
281+
282+ if (!current_segment.empty ()) {
283+ merged_segments.push_back (current_segment);
284+ }
285+
286+ return merged_segments;
287+ }
288+
289+ void processSegment (const std::string& text, std::vector<int >& phones, std::vector<int >& tones)
199290 {
200- DEBUG_LOG (" \n Starting text processing: \" %s\" " , text.c_str ());
201- DEBUG_LOG (" =======Matching Results=======" );
202- DEBUG_LOG (" Unit\t |\t Phonemes\t |\t Tones" );
203- DEBUG_LOG (" -----------------------------" );
204- phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
205- tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
206- DEBUG_LOG (" <BOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
207- tonesToString (unknown_token.second ).c_str ());
208291 auto chars = splitEachChar (text);
209292 int i = 0 ;
210293 while (i < chars.size ()) {
@@ -274,10 +357,39 @@ class Lexicon {
274357 }
275358 }
276359 }
360+ }
361+
362+ void convert (const std::string& text, std::vector<int >& phones, std::vector<int >& tones)
363+ {
364+ DEBUG_LOG (" \n Starting text processing: \" %s\" " , text.c_str ());
365+
366+ std::vector<std::string> segments = splitTextByPunctuation (text);
367+
368+ std::vector<std::string> merged_segments = mergeShortSegments (segments);
369+
370+ DEBUG_LOG (" Text divided into %zu segments after merging short segments" , merged_segments.size ());
371+ for (size_t i = 0 ; i < merged_segments.size (); ++i) {
372+ DEBUG_LOG (" Segment %zu: \" %s\" " , i + 1 , merged_segments[i].c_str ());
373+ }
374+
375+ DEBUG_LOG (" =======Matching Results=======" );
376+ DEBUG_LOG (" Unit\t |\t Phonemes\t |\t Tones" );
377+ DEBUG_LOG (" -----------------------------" );
378+
379+ phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
380+ tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
381+ DEBUG_LOG (" <BOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
382+ tonesToString (unknown_token.second ).c_str ());
383+
384+ for (const auto & segment : merged_segments) {
385+ processSegment (segment, phones, tones);
386+ }
387+
277388 phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
278389 tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
279390 DEBUG_LOG (" <EOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
280391 tonesToString (unknown_token.second ).c_str ());
392+
281393 DEBUG_LOG (" \n Processing Summary:" );
282394 DEBUG_LOG (" Original text: %s" , text.c_str ());
283395 DEBUG_LOG (" Phonemes: %s" , phonesToString (phones).c_str ());
0 commit comments