|
330 | 330 | },
|
331 | 331 | {
|
332 | 332 | "cell_type": "markdown",
|
333 |
| - "metadata": {}, |
| 333 | + "metadata": { |
| 334 | + "tags": [] |
| 335 | + }, |
334 | 336 | "source": [
|
335 |
| - "## 🥊 Challenge 3: Checking Method Output\n", |
| 337 | + "## 🥊 Challenge 4: Check the Data Type\n", |
336 | 338 | "\n",
|
337 |
| - "For each of the following methods, what type is the output?" |
| 339 | + "What is the data type of the output of `describe()`?" |
338 | 340 | ]
|
339 | 341 | },
|
340 | 342 | {
|
341 | 343 | "cell_type": "code",
|
342 |
| - "execution_count": 12, |
| 344 | + "execution_count": null, |
343 | 345 | "metadata": {},
|
344 | 346 | "outputs": [],
|
345 | 347 | "source": [
|
346 |
| - "import pandas as pd\n", |
347 |
| - "df = pd.read_csv('../data/gapminder_gni.csv')" |
348 |
| - ] |
349 |
| - }, |
350 |
| - { |
351 |
| - "cell_type": "code", |
352 |
| - "execution_count": 13, |
353 |
| - "metadata": {}, |
354 |
| - "outputs": [ |
355 |
| - { |
356 |
| - "data": { |
357 |
| - "text/plain": [ |
358 |
| - "pandas.core.series.Series" |
359 |
| - ] |
360 |
| - }, |
361 |
| - "execution_count": 13, |
362 |
| - "metadata": {}, |
363 |
| - "output_type": "execute_result" |
364 |
| - } |
365 |
| - ], |
366 |
| - "source": [ |
367 |
| - "# This is a Series\n", |
368 |
| - "type(df['lifeExp'].value_counts(ascending=True))" |
369 |
| - ] |
370 |
| - }, |
371 |
| - { |
372 |
| - "cell_type": "code", |
373 |
| - "execution_count": 14, |
374 |
| - "metadata": {}, |
375 |
| - "outputs": [ |
376 |
| - { |
377 |
| - "data": { |
378 |
| - "text/plain": [ |
379 |
| - "pandas.core.frame.DataFrame" |
380 |
| - ] |
381 |
| - }, |
382 |
| - "execution_count": 14, |
383 |
| - "metadata": {}, |
384 |
| - "output_type": "execute_result" |
385 |
| - } |
386 |
| - ], |
387 |
| - "source": [ |
388 |
| - "# This is a DataFrame\n", |
389 |
| - "type(df.isnull())" |
390 |
| - ] |
391 |
| - }, |
392 |
| - { |
393 |
| - "cell_type": "code", |
394 |
| - "execution_count": 15, |
395 |
| - "metadata": {}, |
396 |
| - "outputs": [ |
397 |
| - { |
398 |
| - "data": { |
399 |
| - "text/plain": [ |
400 |
| - "pandas.core.frame.DataFrame" |
401 |
| - ] |
402 |
| - }, |
403 |
| - "execution_count": 15, |
404 |
| - "metadata": {}, |
405 |
| - "output_type": "execute_result" |
406 |
| - } |
407 |
| - ], |
408 |
| - "source": [ |
409 |
| - "# This is also a dataframe\n", |
410 |
| - "type(df.dropna())" |
411 |
| - ] |
412 |
| - }, |
413 |
| - { |
414 |
| - "cell_type": "code", |
415 |
| - "execution_count": 16, |
416 |
| - "metadata": {}, |
417 |
| - "outputs": [ |
418 |
| - { |
419 |
| - "data": { |
420 |
| - "text/plain": [ |
421 |
| - "pandas.core.series.Series" |
422 |
| - ] |
423 |
| - }, |
424 |
| - "execution_count": 16, |
425 |
| - "metadata": {}, |
426 |
| - "output_type": "execute_result" |
427 |
| - } |
428 |
| - ], |
429 |
| - "source": [ |
430 |
| - "# This is a Series\n", |
431 |
| - "type(df['country'].str[:4])" |
432 |
| - ] |
433 |
| - }, |
434 |
| - { |
435 |
| - "cell_type": "markdown", |
436 |
| - "metadata": {}, |
437 |
| - "source": [ |
438 |
| - "## 🥊 Challenge 4: Dealing With Missing Values\n", |
439 |
| - "\n", |
440 |
| - "Dealing with missing values is important, even if some methods in Pandas automatically exclude them.\n", |
441 |
| - "\n", |
442 |
| - "1. Find the missing values of `df['gniPercap']` using the `.isna()` method. Check the [documentation](https://pandas.pydata.org/docs/reference/api/pandas.isnull.html) to see how it works.\n", |
443 |
| - "2. Then, run the `sum()` of that output to see how many missing values we have in total.\n", |
444 |
| - "3. Remove all missing values in the column using the `.dropna()` method. Check the [documentation](https://pandas.pydata.org/docs/reference/api/pandas.Series.dropna.html) to see how it works." |
445 |
| - ] |
446 |
| - }, |
447 |
| - { |
448 |
| - "cell_type": "code", |
449 |
| - "execution_count": 17, |
450 |
| - "metadata": {}, |
451 |
| - "outputs": [ |
452 |
| - { |
453 |
| - "data": { |
454 |
| - "text/plain": [ |
455 |
| - "15 730.0\n", |
456 |
| - "16 320.0\n", |
457 |
| - "17 860.0\n", |
458 |
| - "18 1650.0\n", |
459 |
| - "19 4040.0\n", |
460 |
| - " ... \n", |
461 |
| - "1315 810.0\n", |
462 |
| - "1316 640.0\n", |
463 |
| - "1317 620.0\n", |
464 |
| - "1318 440.0\n", |
465 |
| - "1319 330.0\n", |
466 |
| - "Name: gniPercap, Length: 1010, dtype: float64" |
467 |
| - ] |
468 |
| - }, |
469 |
| - "execution_count": 17, |
470 |
| - "metadata": {}, |
471 |
| - "output_type": "execute_result" |
472 |
| - } |
473 |
| - ], |
474 |
| - "source": [ |
475 |
| - "# YOUR CODE HERE\n", |
476 |
| - "df['gniPercap'].isna().sum()\n", |
477 |
| - "df['gniPercap'].dropna() # Tip: add inplace=True as a argument to alter the DF directly\n" |
478 |
| - ] |
479 |
| - }, |
480 |
| - { |
481 |
| - "cell_type": "code", |
482 |
| - "execution_count": 18, |
483 |
| - "metadata": { |
484 |
| - "tags": [] |
485 |
| - }, |
486 |
| - "outputs": [ |
487 |
| - { |
488 |
| - "data": { |
489 |
| - "text/plain": [ |
490 |
| - "0 1\n", |
491 |
| - "1 1\n", |
492 |
| - "2 1\n", |
493 |
| - "3 1\n", |
494 |
| - "4 1\n", |
495 |
| - " ..\n", |
496 |
| - "1315 3\n", |
497 |
| - "1316 3\n", |
498 |
| - "1317 3\n", |
499 |
| - "1318 3\n", |
500 |
| - "1319 3\n", |
501 |
| - "Name: continent, Length: 1320, dtype: int64" |
502 |
| - ] |
503 |
| - }, |
504 |
| - "execution_count": 18, |
505 |
| - "metadata": {}, |
506 |
| - "output_type": "execute_result" |
507 |
| - } |
508 |
| - ], |
509 |
| - "source": [ |
510 |
| - "#YOUR CODE HERE\n", |
511 |
| - "df.continent.replace(df['continent'].unique(), [1,2,3,4,5])" |
| 348 | + "type(df.describe())" |
512 | 349 | ]
|
513 | 350 | },
|
514 | 351 | {
|
|
0 commit comments