import numpy as np
import pandas as pd
from pandas import Series, DataFrame
pd.set_option("display.precision", 2)

# Answering some questions about this Popular Baby Names Dataset in New York.
# This dataset was extracted from https://www.data.gov, the site of the U.S. 
# Government’s open data.

data = pd.read_csv('Popular_Baby_Names_New_York_City.csv')
data.head()

# Taking a look at data dimensionality:

print(data.shape)

(19418, 6)

# From the output, we can see that the table contains 19418 rows and 6 columns.

# How many female and male (Gender feature) are represented in this dataset?

data['Gender'].value_counts()

FEMALE    9933
MALE      9485
Name: Gender, dtype: int64

# 9933 females and 9485 males are represented in this dataset.

# Exploring more the dataset, we sort the values on it by Year of Birth:

data.sort_values(by='Year of Birth', ascending=False)

#What are the ethnicities represented in this dataset?

data['Ethnicity'].value_counts()

HISPANIC                      5714
WHITE NON HISPANIC            5473
BLACK NON HISPANIC            2826
ASIAN AND PACIFIC ISLANDER    2693
WHITE NON HISP                1338
ASIAN AND PACI                 693
BLACK NON HISP                 681
Name: Ethnicity, dtype: int64

# As we can see, we have a very diverse ethnicity representation.

# What is the percentage of Hispanic females and males (Ethnicity feature)?

float((data['Ethnicity'] == 'HISPANIC').sum()) / data.shape[0]

0.29426305489751775

np.around([0.29426305489751775], decimals=2)

array([0.29])

# The percentage of Hispanic females and males is around the 29%.

# What is the percentage of White Non Hispanic females and males (Ethnicity feature)?

float((data['Ethnicity'] == 'WHITE NON HISPANIC').sum()) / data.shape[0]

0.28185188999897004

np.around([0.28185188999897004], decimals=2)

array([0.28])

# The percentage of Non White Hispanic females and males is around the 28%.

# What is the average Year of Birth of females in this dataset?

data[data['Gender']=='FEMALE']['Year of Birth'].mean()

2013.1224202154435

#The year 2013

# What is the average Year of Birth of males in this dataset?

data[data['Gender']=='MALE']['Year of Birth'].mean()

2013.1263046916183

#Also the year 2013.

#What are the some of the names among the BLACK NON HISPANIC ethnicity?

data[data['Ethnicity'] == 'BLACK NON HISPANIC']['Child\'s First Name']

696          AARON
697         ABDOUL
698      ABDOULAYE
699           ADAM
700           ADEN
           ...    
19389         Levi
19392       Jahmir
19397         Liam
19400        Fatou
19407        Layla
Name: Child's First Name, Length: 2826, dtype: object

#Looking the above results, we can see that some of the names among the BLACK NON HISPANIC ethnicity had a Middle East-Jewish
# influence and origin. For example, names like Fatou, Layla, Abdoul, Levi, Aaron.

# We will analyse more the popular names among the ASIAN AND PACIFIC ISLANDER babies.
# For this propose we are extracting a portion of the dataset and selecting this ethnicity.
gb = data.groupby("Ethnicity")
asian_pac_islander = gb.get_group("ASIAN AND PACIFIC ISLANDER")
asian_pac_islander[546:576]

# We can have some insides about the anglo-saxon origins and influences of names like Mia, Melody, Phoebe and Megan.

	Year of Birth	Gender	Ethnicity	Child's First Name	Count	Rank
16007	2016	MALE	ASIAN AND PACIFIC ISLANDER	Stanley	12	57
17686	2016	MALE	HISPANIC	Valentino	14	87
17710	2016	FEMALE	WHITE NON HISPANIC	Alma	18	76
16188	2016	FEMALE	WHITE NON HISPANIC	Yitty	58	40
17707	2016	FEMALE	HISPANIC	Sasha	16	68
16189	2016	MALE	ASIAN AND PACIFIC ISLANDER	Rayyan	19	50
16190	2016	MALE	BLACK NON HISPANIC	Isaac	20	48
17704	2016	FEMALE	HISPANIC	Hailey	81	22
17702	2016	FEMALE	WHITE NON HISPANIC	Dylan	19	75
16192	2016	MALE	HISPANIC	Felix	10	91
17700	2016	FEMALE	ASIAN AND PACIFIC ISLANDER	Queenie	20	31
17699	2016	FEMALE	WHITE NON HISPANIC	Ashley	10	84
16194	2016	FEMALE	BLACK NON HISPANIC	Zariah	15	38
17694	2016	MALE	ASIAN AND PACIFIC ISLANDER	Jace	11	58
17691	2016	MALE	BLACK NON HISPANIC	Christian	61	16
17690	2016	FEMALE	BLACK NON HISPANIC	Emily	26	27
16199	2016	FEMALE	ASIAN AND PACIFIC ISLANDER	Allison	20	31
16186	2016	MALE	HISPANIC	Ivan	36	67
17713	2016	FEMALE	WHITE NON HISPANIC	Liv	14	80
17714	2016	FEMALE	HISPANIC	Belen	10	74
16179	2016	MALE	WHITE NON HISPANIC	Maximilian	24	85
16174	2016	FEMALE	BLACK NON HISPANIC	Sasha	12	41
17733	2016	MALE	BLACK NON HISPANIC	Jaxon	16	51
17732	2016	FEMALE	HISPANIC	Layla	57	33
17728	2016	FEMALE	WHITE NON HISPANIC	Mindy	21	73
17727	2016	FEMALE	WHITE NON HISPANIC	Leila	24	70
16178	2016	FEMALE	WHITE NON HISPANIC	Nicole	35	59
17724	2016	FEMALE	WHITE NON HISPANIC	Lila	45	49
17715	2016	MALE	HISPANIC	Max	25	76
16180	2016	MALE	WHITE NON HISPANIC	Rocco	30	79
...	...	...	...	...	...	...
2591	2011	FEMALE	BLACK NON HISPANIC	BRIANNA	38	17
2619	2011	FEMALE	BLACK NON HISPANIC	GRACE	15	40
2606	2011	FEMALE	BLACK NON HISPANIC	EMILY	30	25
2618	2011	FEMALE	BLACK NON HISPANIC	GENESIS	15	40
2617	2011	FEMALE	BLACK NON HISPANIC	GABRIELLE	55	8
2616	2011	FEMALE	BLACK NON HISPANIC	GABRIELLA	24	31
2615	2011	FEMALE	BLACK NON HISPANIC	FATOUMATA	51	11
2614	2011	FEMALE	BLACK NON HISPANIC	FATOU	12	43
2613	2011	FEMALE	BLACK NON HISPANIC	FATIMA	24	31
2612	2011	FEMALE	BLACK NON HISPANIC	FANTA	15	40
2611	2011	FEMALE	BLACK NON HISPANIC	FAITH	46	12
2610	2011	FEMALE	BLACK NON HISPANIC	EVA	11	44
2609	2011	FEMALE	BLACK NON HISPANIC	ESSENCE	11	44
2608	2011	FEMALE	BLACK NON HISPANIC	ERIN	10	45
2607	2011	FEMALE	BLACK NON HISPANIC	EMMA	15	40
2605	2011	FEMALE	BLACK NON HISPANIC	ELIZABETH	22	33
2592	2011	FEMALE	BLACK NON HISPANIC	BRIELLE	35	20
2604	2011	FEMALE	BLACK NON HISPANIC	EGYPT	15	40
2603	2011	FEMALE	BLACK NON HISPANIC	EDEN	11	44
2602	2011	FEMALE	BLACK NON HISPANIC	DYLAN	11	44
2601	2011	FEMALE	BLACK NON HISPANIC	DESTINY	35	20
2600	2011	FEMALE	BLACK NON HISPANIC	DANIELLE	20	35
2599	2011	FEMALE	BLACK NON HISPANIC	DAKOTA	15	40
2598	2011	FEMALE	BLACK NON HISPANIC	CHRISTINA	17	38
2597	2011	FEMALE	BLACK NON HISPANIC	CHLOE	72	4
2596	2011	FEMALE	BLACK NON HISPANIC	CHEYENNE	10	45
2595	2011	FEMALE	BLACK NON HISPANIC	CHELSEA	37	18
2594	2011	FEMALE	BLACK NON HISPANIC	CAMILLE	12	43
2593	2011	FEMALE	BLACK NON HISPANIC	BROOKE	12	43
0	2011	FEMALE	HISPANIC	GERALDINE	13	75

	Year of Birth	Gender	Ethnicity	Child's First Name	Count	Rank
2490	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KATE	10	38
2491	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KATELYN	14	34
2492	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KATHERINE	21	27
2493	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KATIE	18	30
2494	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KAYLA	21	27
2495	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KAYLEE	20	28
2496	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KELLY	22	26
2497	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KHLOE	13	35
2498	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	KYLIE	11	37
2499	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	LAUREN	13	35
2500	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	LEAH	16	32
2501	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	LEELA	10	38
2502	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	LILLIAN	16	32
2503	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	LILY	15	33
2504	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MADISON	19	29
2505	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MAGGIE	10	38
2506	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MANDY	12	36
2507	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MARYAM	17	31
2508	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MAYA	27	21
2509	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MEGAN	10	38
2510	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MELODY	13	35
2511	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MIA	36	14
2512	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MICHELLE	42	12
2513	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MILA	17	31
2514	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	MINA	12	36
2515	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	NATALIE	20	28
2516	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	NICOLE	33	16
2517	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	NINA	21	27
2518	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	OLIVIA	89	4
2519	2011	FEMALE	ASIAN AND PACIFIC ISLANDER	PHOEBE	18	30

Search This Blog

curious_about_data

Analysing New York's Popular Baby Names Dataset.

Comments

Post a Comment