|
6 | 6 | "source": [ |
7 | 7 | "# Regular Expressions\n", |
8 | 8 | "\n", |
9 | | - "In the last session we tried to interpret strings as valid heights and weights. This involved looking for text such as \"meter\" or \"kilogram\" in the string, and then extracting the number. This process is called pattern matching, and is best undertaken using a regular expression.\n", |
| 9 | + "In the error handling session we tried to interpret strings as valid heights and weights. This involved looking for text such as \"meter\" or \"kilogram\" in the string, and then extracting the number. This process is called pattern matching, and is best undertaken using a regular expression.\n", |
10 | 10 | "\n", |
11 | 11 | "Regular expressions have a long history and are available in most programming languages. Python implements a standards-compliant regular expression module, which is called `re`." |
12 | 12 | ] |
13 | 13 | }, |
14 | 14 | { |
15 | 15 | "cell_type": "code", |
16 | 16 | "execution_count": null, |
17 | | - "metadata": { |
18 | | - "collapsed": true |
19 | | - }, |
| 17 | + "metadata": {}, |
20 | 18 | "outputs": [], |
21 | 19 | "source": [ |
22 | 20 | "import re" |
|
32 | 30 | { |
33 | 31 | "cell_type": "code", |
34 | 32 | "execution_count": null, |
35 | | - "metadata": { |
36 | | - "collapsed": true |
37 | | - }, |
| 33 | + "metadata": {}, |
38 | 34 | "outputs": [], |
39 | 35 | "source": [ |
40 | 36 | "h = \"2 meters\"" |
|
69 | 65 | { |
70 | 66 | "cell_type": "code", |
71 | 67 | "execution_count": null, |
72 | | - "metadata": { |
73 | | - "collapsed": true |
74 | | - }, |
| 68 | + "metadata": {}, |
75 | 69 | "outputs": [], |
76 | 70 | "source": [ |
77 | 71 | "m = re.search(\"meters\", h)" |
|
96 | 90 | { |
97 | 91 | "cell_type": "code", |
98 | 92 | "execution_count": null, |
99 | | - "metadata": { |
100 | | - "collapsed": true |
101 | | - }, |
| 93 | + "metadata": {}, |
102 | 94 | "outputs": [], |
103 | 95 | "source": [ |
104 | 96 | "h = \"2 meter\"" |
|
107 | 99 | { |
108 | 100 | "cell_type": "code", |
109 | 101 | "execution_count": null, |
110 | | - "metadata": { |
111 | | - "collapsed": true |
112 | | - }, |
| 102 | + "metadata": {}, |
113 | 103 | "outputs": [], |
114 | 104 | "source": [ |
115 | 105 | "m = re.search(\"meters?\", h)" |
|
134 | 124 | { |
135 | 125 | "cell_type": "code", |
136 | 126 | "execution_count": null, |
137 | | - "metadata": { |
138 | | - "collapsed": true |
139 | | - }, |
| 127 | + "metadata": {}, |
140 | 128 | "outputs": [], |
141 | 129 | "source": [ |
142 | 130 | "m = re.search(\"meters?$\", h)" |
|
161 | 149 | { |
162 | 150 | "cell_type": "code", |
163 | 151 | "execution_count": null, |
164 | | - "metadata": { |
165 | | - "collapsed": true |
166 | | - }, |
| 152 | + "metadata": {}, |
167 | 153 | "outputs": [], |
168 | 154 | "source": [ |
169 | 155 | "h = \"2 m\"" |
|
172 | 158 | { |
173 | 159 | "cell_type": "code", |
174 | 160 | "execution_count": null, |
175 | | - "metadata": { |
176 | | - "collapsed": true |
177 | | - }, |
| 161 | + "metadata": {}, |
178 | 162 | "outputs": [], |
179 | 163 | "source": [ |
180 | 164 | "m = re.search(\"(m|meters?)$\", h)" |
|
199 | 183 | { |
200 | 184 | "cell_type": "code", |
201 | 185 | "execution_count": null, |
202 | | - "metadata": { |
203 | | - "collapsed": true |
204 | | - }, |
| 186 | + "metadata": {}, |
205 | 187 | "outputs": [], |
206 | 188 | "source": [ |
207 | 189 | "h = \"2 meters\"" |
|
210 | 192 | { |
211 | 193 | "cell_type": "code", |
212 | 194 | "execution_count": null, |
213 | | - "metadata": { |
214 | | - "collapsed": true |
215 | | - }, |
| 195 | + "metadata": {}, |
216 | 196 | "outputs": [], |
217 | 197 | "source": [ |
218 | 198 | "m = re.search(\"\\d (m|meters?)$\", h)" |
|
237 | 217 | { |
238 | 218 | "cell_type": "code", |
239 | 219 | "execution_count": null, |
240 | | - "metadata": { |
241 | | - "collapsed": true |
242 | | - }, |
| 220 | + "metadata": {}, |
243 | 221 | "outputs": [], |
244 | 222 | "source": [ |
245 | 223 | "h = \"10 meters\"" |
|
248 | 226 | { |
249 | 227 | "cell_type": "code", |
250 | 228 | "execution_count": null, |
251 | | - "metadata": { |
252 | | - "collapsed": true |
253 | | - }, |
| 229 | + "metadata": {}, |
254 | 230 | "outputs": [], |
255 | 231 | "source": [ |
256 | 232 | "m = re.search(\"\\d+ (m|meters?)$\", h)" |
|
275 | 251 | { |
276 | 252 | "cell_type": "code", |
277 | 253 | "execution_count": null, |
278 | | - "metadata": { |
279 | | - "collapsed": true |
280 | | - }, |
| 254 | + "metadata": {}, |
281 | 255 | "outputs": [], |
282 | 256 | "source": [ |
283 | 257 | "h = \"1.5 meters\"" |
|
286 | 260 | { |
287 | 261 | "cell_type": "code", |
288 | 262 | "execution_count": null, |
289 | | - "metadata": { |
290 | | - "collapsed": true |
291 | | - }, |
| 263 | + "metadata": {}, |
292 | 264 | "outputs": [], |
293 | 265 | "source": [ |
294 | 266 | "m = re.search(\"\\d+\\.?\\d* (m|meters?)$\", h)" |
|
313 | 285 | { |
314 | 286 | "cell_type": "code", |
315 | 287 | "execution_count": null, |
316 | | - "metadata": { |
317 | | - "collapsed": true |
318 | | - }, |
| 288 | + "metadata": {}, |
319 | 289 | "outputs": [], |
320 | 290 | "source": [ |
321 | 291 | "h = \"some 1.8 meters\"" |
|
324 | 294 | { |
325 | 295 | "cell_type": "code", |
326 | 296 | "execution_count": null, |
327 | | - "metadata": { |
328 | | - "collapsed": true |
329 | | - }, |
| 297 | + "metadata": {}, |
330 | 298 | "outputs": [], |
331 | 299 | "source": [ |
332 | 300 | "m = re.search(\"^\\d+\\.?\\d* (m|meters?)$\", h)" |
|
335 | 303 | { |
336 | 304 | "cell_type": "code", |
337 | 305 | "execution_count": null, |
338 | | - "metadata": { |
339 | | - "collapsed": true |
340 | | - }, |
| 306 | + "metadata": {}, |
341 | 307 | "outputs": [], |
342 | 308 | "source": [ |
343 | 309 | "m" |
|
353 | 319 | { |
354 | 320 | "cell_type": "code", |
355 | 321 | "execution_count": null, |
356 | | - "metadata": { |
357 | | - "collapsed": true |
358 | | - }, |
| 322 | + "metadata": {}, |
359 | 323 | "outputs": [], |
360 | 324 | "source": [ |
361 | 325 | "h = \" 1.8 METers \"" |
|
364 | 328 | { |
365 | 329 | "cell_type": "code", |
366 | 330 | "execution_count": null, |
367 | | - "metadata": { |
368 | | - "collapsed": true |
369 | | - }, |
| 331 | + "metadata": {}, |
370 | 332 | "outputs": [], |
371 | 333 | "source": [ |
372 | 334 | "m = re.search(\"^\\s*\\d+\\.?\\d*\\s*(m|meters?)\\s*$\", h, re.IGNORECASE)" |
|
407 | 369 | { |
408 | 370 | "cell_type": "code", |
409 | 371 | "execution_count": null, |
410 | | - "metadata": { |
411 | | - "collapsed": true |
412 | | - }, |
| 372 | + "metadata": {}, |
413 | 373 | "outputs": [], |
414 | 374 | "source": [ |
415 | 375 | "m = re.search(\"^\\s*(\\d+\\.?\\d*)\\s*(m|meters?)\\s*$\", h, re.IGNORECASE)" |
|
434 | 394 | { |
435 | 395 | "cell_type": "code", |
436 | 396 | "execution_count": null, |
437 | | - "metadata": { |
438 | | - "collapsed": true |
439 | | - }, |
| 397 | + "metadata": {}, |
440 | 398 | "outputs": [], |
441 | 399 | "source": [ |
442 | 400 | "def string_to_height(height):\n", |
|
452 | 410 | { |
453 | 411 | "cell_type": "code", |
454 | 412 | "execution_count": null, |
455 | | - "metadata": { |
456 | | - "collapsed": true |
457 | | - }, |
| 413 | + "metadata": {}, |
458 | 414 | "outputs": [], |
459 | 415 | "source": [ |
460 | 416 | "h = string_to_height(\" 1.5 meters \")" |
|
499 | 455 | { |
500 | 456 | "cell_type": "code", |
501 | 457 | "execution_count": null, |
502 | | - "metadata": { |
503 | | - "collapsed": true |
504 | | - }, |
| 458 | + "metadata": {}, |
505 | 459 | "outputs": [], |
506 | 460 | "source": [ |
507 | 461 | "def get_number_and_unit(s):\n", |
|
521 | 475 | { |
522 | 476 | "cell_type": "code", |
523 | 477 | "execution_count": null, |
524 | | - "metadata": { |
525 | | - "collapsed": true |
526 | | - }, |
| 478 | + "metadata": {}, |
527 | 479 | "outputs": [], |
528 | 480 | "source": [] |
529 | 481 | } |
|
544 | 496 | "name": "python", |
545 | 497 | "nbconvert_exporter": "python", |
546 | 498 | "pygments_lexer": "ipython3", |
547 | | - "version": "3.5.3" |
| 499 | + "version": "3.5.2" |
548 | 500 | } |
549 | 501 | }, |
550 | 502 | "nbformat": 4, |
|
0 commit comments