@@ -8,142 +8,105 @@ const SafeConditionalUrlPatternBuilder_1 = require("../pattern/SafeConditionalUr
88const BasePatterns_1 = require ( "../pattern/BasePatterns" ) ;
99const DomainPatterns_1 = require ( "../pattern/DomainPatterns" ) ;
1010const util_1 = __importDefault ( require ( "../util" ) ) ;
11- const ParamsPatterns_1 = require ( "../pattern/ParamsPatterns" ) ;
1211const EmailPatternBuilder_1 = require ( "../pattern/EmailPatternBuilder" ) ;
1312const UrlAreaService_1 = require ( "./UrlAreaService" ) ;
1413const EmailAreaService_1 = require ( "./EmailAreaService" ) ;
14+ const UriMatchProcessor_1 = require ( "../bo/UriMatchProcessor" ) ;
15+ const EmailMatchProcessor_1 = require ( "../bo/EmailMatchProcessor" ) ;
1516exports . TextAreaService = {
16- extractAllPureUrls ( textStr ) {
17+ extractAllUrlMatchList ( textStr ) {
1718 if ( ! ( textStr && typeof textStr === 'string' ) ) {
1819 throw new Error ( 'the variable textStr must be a string type and not be null.' ) ;
1920 }
20- let obj = [ ] ;
21- let rx = new RegExp ( SafeConditionalUrlPatternBuilder_1 . SafeConditionalUrlPatternBuilder . getUrl , 'gi' ) ;
22- let matches = [ ] ;
21+ const urlRx = new RegExp ( SafeConditionalUrlPatternBuilder_1 . SafeConditionalUrlPatternBuilder . getUrl , 'gi' ) ;
22+ let urlMatchList = [ ] ;
2323 let match ;
24- while ( ( match = rx . exec ( textStr ) ) !== null ) {
25- /* SKIP DEPENDENCY */
24+ while ( ( match = urlRx . exec ( textStr ) ) !== null ) {
25+ /* EXCLUDED FROM MATCH LIST : Email Type */
2626 if ( / ^ @ / . test ( match [ 0 ] ) ) {
2727 continue ;
2828 }
2929 let startIdx = match . index ;
3030 let endIdx = match . index + match [ 0 ] . length ;
31- let modVal = match [ 0 ] ;
32- let re = UrlAreaService_1 . UrlAreaService . parseUrl ( modVal ) ;
33- /* SKIP DEPENDENCY */
34- if ( re . onlyDomain && new RegExp ( '^(?:\\.|[0-9]|' + BasePatterns_1 . BasePatterns . twoBytesNum + ')+$' , 'i' ) . test ( re . onlyDomain ) ) {
31+ const parsedUrl = UrlAreaService_1 . UrlAreaService . parseUrl ( match [ 0 ] ) ;
32+ /* EXCLUDED FROM MATCH LIST */
33+ if ( parsedUrl . onlyDomain && new RegExp ( '^(?:\\.|[0-9]|' + BasePatterns_1 . BasePatterns . twoBytesNum + ')+$' , 'i' ) . test ( parsedUrl . onlyDomain ) ) {
3534 // ipV4 is OK
36- if ( ! new RegExp ( '^' + DomainPatterns_1 . DomainPatterns . ipV4 + '$' , 'i' ) . test ( re . onlyDomain ) ) {
35+ if ( ! new RegExp ( '^' + DomainPatterns_1 . DomainPatterns . ipV4 + '$' , 'i' ) . test ( parsedUrl . onlyDomain ) ) {
3736 continue ;
3837 }
3938 }
40- /* this part doesn't need to be included */
41- if ( re . removedTailOnUrl && re . removedTailOnUrl . length > 0 ) {
42- endIdx -= re . removedTailOnUrl . length ;
39+ /* Adjust endIdx by the length of removedTailOnUrl, if it exists */
40+ if ( parsedUrl . removedTailOnUrl && parsedUrl . removedTailOnUrl . length > 0 ) {
41+ endIdx -= parsedUrl . removedTailOnUrl . length ;
4342 }
44- obj . push ( {
45- value : re ,
43+ urlMatchList . push ( {
44+ value : parsedUrl ,
4645 area : 'text' ,
4746 index : {
4847 start : startIdx ,
4948 end : endIdx
5049 }
5150 } ) ;
5251 }
53- return obj ;
52+ return urlMatchList ;
5453 } ,
5554 /*
5655 * [!!IMPORTANT] Should be refactored.
5756 * */
58- extractCertainPureUris ( textStr , uris , endBoundary ) {
59- let uriRx = util_1 . default . Text . urisToOneRxStr ( uris ) ;
60- if ( ! uriRx ) {
57+ extractCertainUriMatchList ( textStr , uris , endBoundary ) {
58+ let urisRxStr = util_1 . default . Text . urisToOneRxStr ( uris ) ;
59+ if ( ! urisRxStr ) {
6160 throw new Error ( 'the variable uris are not available' ) ;
6261 }
63- if ( endBoundary ) {
64- uriRx = '(?:\\/[^\\s]*\\/|' +
65- '(?:[0-9]|' + BasePatterns_1 . BasePatterns . twoBytesNum + '|' + BasePatterns_1 . BasePatterns . langChar + ')'
66- + '[^/\\s]*(?:[0-9]|' + BasePatterns_1 . BasePatterns . twoBytesNum + '|' + BasePatterns_1 . BasePatterns . langChar + ')'
67- + '\\/|\\/|\\b)' +
68- '(?:' + uriRx + ')' +
69- '(?:' + ParamsPatterns_1 . ParamsPatterns . mandatoryUrlParams + '|[\\s]|$)' ;
70- }
71- else {
72- uriRx = '(?:\\/[^\\s]*\\/|' +
73- '(?:[0-9]|' + BasePatterns_1 . BasePatterns . twoBytesNum + '|' + BasePatterns_1 . BasePatterns . langChar + ')'
74- + '[^/\\s]*(?:[0-9]|' + BasePatterns_1 . BasePatterns . twoBytesNum + '|' + BasePatterns_1 . BasePatterns . langChar + ')'
75- + '\\/|\\/|\\b)' +
76- '(?:' + uriRx + ')' + ParamsPatterns_1 . ParamsPatterns . optionalUrlParams ;
77- }
78- let obj = [ ] ;
79- /* normal text area */
80- let rx = new RegExp ( uriRx , 'gi' ) ;
62+ urisRxStr = ( 0 , UriMatchProcessor_1 . adjustUrisRx ) ( urisRxStr , endBoundary ) ;
63+ let uriMatchList = [ ] ;
64+ const uriRx = new RegExp ( urisRxStr , 'gi' ) ;
8165 let match ;
82- while ( ( match = rx . exec ( textStr ) ) !== null ) {
83- let mod_val = match [ 0 ] ;
84- obj . push ( {
85- value : UrlAreaService_1 . UrlAreaService . parseUrl ( mod_val ) ,
66+ while ( ( match = uriRx . exec ( textStr ) ) !== null ) {
67+ uriMatchList . push ( {
68+ value : UrlAreaService_1 . UrlAreaService . parseUrl ( match [ 0 ] ) ,
8669 area : 'text' ,
8770 index : {
8871 start : match . index ,
8972 end : match . index + match [ 0 ] . length
9073 }
9174 } ) ;
9275 }
93- return obj ;
76+ return uriMatchList ;
9477 } ,
95- extractAllPureEmails ( textStr , finalPrefixSanitizer ) {
78+ extractAllEmailMatchList ( textStr , finalPrefixSanitizer ) {
9679 if ( ! ( textStr && typeof textStr === 'string' ) ) {
9780 throw new Error ( 'the variable textStr must be a string type and not be null.' ) ;
9881 }
99- let obj = [ ] ;
100- let rx = new RegExp ( EmailPatternBuilder_1 . EmailPatternBuilder . getEmail , 'gi' ) ;
82+ let emailMatchList = [ ] ;
83+ const emailRx = new RegExp ( EmailPatternBuilder_1 . EmailPatternBuilder . getEmail , 'gi' ) ;
10184 let match ;
102- while ( ( match = rx . exec ( textStr ) ) !== null ) {
103- let mod_val = match [ 0 ] ;
104- let mod_val_front = mod_val . split ( / @ / ) [ 0 ] ;
105- let st_idx = match . index ;
106- let end_idx = match . index + match [ 0 ] . length ;
107- /* prefixSanitizer */
85+ while ( ( match = emailRx . exec ( textStr ) ) !== null ) {
86+ let matchedEmail = match [ 0 ] ;
87+ let matchedEmailFront = matchedEmail . split ( / @ / ) [ 0 ] ;
88+ let startIdx = match . index ;
89+ let endIdx = match . index + match [ 0 ] . length ;
10890 if ( finalPrefixSanitizer ) {
109- // the 'border' is a en char that divides non-en and en areas.
110- let border = '' ;
111- let removedLength = 0 ;
112- let rx_left_plus_border = new RegExp ( '^([^a-zA-Z0-9]+)([a-zA-Z0-9])' , '' ) ;
113- let is_mod_val_front_only_foreign_lang = true ;
114- let match2 ;
115- if ( ( match2 = rx_left_plus_border . exec ( mod_val_front ) ) !== null ) {
116- is_mod_val_front_only_foreign_lang = false ;
117- //console.log('match2:' + match2);
118- if ( match2 [ 1 ] ) {
119- removedLength = match2 [ 1 ] . length ;
120- }
121- if ( match2 [ 2 ] ) {
122- border = match2 [ 2 ] ;
123- }
124- }
125- if ( is_mod_val_front_only_foreign_lang === false ) {
126- mod_val = mod_val . replace ( rx_left_plus_border , '' ) ;
127- mod_val = border + mod_val ;
128- }
129- st_idx += removedLength ;
91+ const { sanitizedEmail, removedLength } = ( 0 , EmailMatchProcessor_1 . sanitizeEmailPrefix ) ( matchedEmailFront , matchedEmail ) ;
92+ matchedEmail = sanitizedEmail ;
93+ startIdx += removedLength ;
13094 }
131- let re = EmailAreaService_1 . EmailAreaService . assortEmail ( mod_val ) ;
132- //console.log('re : ' + re);
133- /* this part doesn't need to be included */
134- if ( re . removedTailOnEmail && re . removedTailOnEmail . length > 0 ) {
135- end_idx -= re . removedTailOnEmail . length ;
95+ let parsedEmail = EmailAreaService_1 . EmailAreaService . parseEmail ( matchedEmail ) ;
96+ /* Adjust endIdx by the length of removedTailOnUrl, if it exists */
97+ if ( parsedEmail . removedTailOnEmail && parsedEmail . removedTailOnEmail . length > 0 ) {
98+ endIdx -= parsedEmail . removedTailOnEmail . length ;
13699 }
137- obj . push ( {
138- value : re ,
100+ emailMatchList . push ( {
101+ value : parsedEmail ,
139102 area : 'text' ,
140103 index : {
141- start : st_idx ,
142- end : end_idx
104+ start : startIdx ,
105+ end : endIdx
143106 } ,
144- pass : EmailAreaService_1 . EmailAreaService . strictTest ( re . email )
107+ pass : EmailAreaService_1 . EmailAreaService . strictTest ( parsedEmail . email )
145108 } ) ;
146109 }
147- return obj ;
110+ return emailMatchList ;
148111 }
149112} ;
0 commit comments