Free prototype
invision
https://www.invisionapp.com/
3 month trial for free
https://www.invisionapp.com/signup/udacity
ソフトウェアエンジニアの技術ブログ:Software engineer tech blog
随机应变 ABCD: Always Be Coding and … : хороший
Free prototype
invision
https://www.invisionapp.com/
3 month trial for free
https://www.invisionapp.com/signup/udacity
what is the mode?
2, 5, 5, 9, 8
-> 5
most of the number occurence
frequency is the highest
import pandas as pd
grades_df = pd.DataFrame(
data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87],
'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]},
index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio',
'Fred', 'Greta', 'Humbert', 'Ivan', 'James']
)
if False:
def convert_grades_curve(exam_grades):
return pd.qcut(exam_grades,
[0, 0.1, 0.2, 0.5, 0.8, 1],
labels=['F','D','C','B','A'])
print convert_grades_curve(grades_df['exam1'])
print grades_df.apply(convert_grades_curve)
def standardize(df):
return None
import numpy as np
import pandas as pd
df = pd.DataFrame({
'a': [4,5,3,1,2],
'b': [20,10,40,50,30],
'c': [25,20, 5, 15, 10]
})
if False:
print df.apply(np.mean)
print df.apply(np.max)
def second_largest(df):
return None
import pandas as pd
if False:
df1 = pd.DataFrame({'a':[1,2,3],'b':[4,5,6],'c':[7,8,9]})
df2 = pd.DataFrame({'a':[10,20,30],'b':[40,50,60],'c':[70,80,90]})
print df1 + df2
if False:
df1 = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6], 'c':['7','8','9']})
df2 = pd.DataFrame({'d':[10,20,30], 'c':[40,50,60], 'b':[70,80,90]})
df1 + df2
if False:
df1 = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6], 'c':[7,8,9]},
index=['row1','row2','row3'])
df2 = pd.DataFrame({'a':[10,20,30],'b':[40,50,60],'c':[70,80,90]},
index=['row4','row3','row2'])
print df1 + df2
entries_and_exits = pd.DataFrame({
'ENTRIESn': [3144312, 3144335, 3144353, 3144424, 3144594,
3144808, 3144895, 3144905, 3144941, 3145094],
'EXITSn': [1088151, 1088159, 1088177, 1088231, 1088275,
1088317, 1088328, 1088331, 1088420, 1088753]
})
def get_hourly_entries_and_exits(entries_and_exits):
return None
import pandas as pd
if False:
df = pd.DataFrame({
'a':[1, 2, 3],
'b':[10, 20, 30],
'c':[5, 10, 15]
})
def add_one(x):
return x + 1
print df.applymap(add_one)
grades_df = pd.DataFrame(
data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87],
'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]},
index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio',
'Fred', 'Greta', 'Humbert', 'Ivan', 'James']
)
def convert_grades(grades):
return None
import pandas as pd filename = 'xxxx.csv' subway_df = pd.read_csv(filename) def correlation(x, y): std_x = (x - x.mean()) / x.std(ddof=0) std_y = (y - y.mean()) / y.std(ddof=0) return (std_x * std_y).mean() entries = subway_df['ENTRIESn_hourly'] cum_entries = subway_df['meanprecipi'] rain = subway_df['meanprecipi'] temp = subway_df['meantempi'] print correlation(entries, rain) print correlation(entries, temp) print correlation(rain, temp) print correlation(entries, cum_entries)
import pandas as pd
ridership_df = pd.DataFrame(
data=[[ 0, 0, 2, 5, 0],
[1478, 3877, 3674, 2328, 2539],
[1613, 4088, 3991, 6461, 2691],
[1560, 3392, 3826, 4787, 2613],
[1608, 4802, 3932, 4477, 2705],
[1576, 3933, 3909, 4979, 2685],
[ 95, 229, 255, 496, 201],
[ 2, 0, 1, 27, 0],
[1438, 3785, 3589, 4174, 2215],
[1342, 4043, 4009, 4665, 3033]],
index=['05-01-11', '05-02-11', '05-03-11', '05-04-11', '05-05-11',
'05-06-11', '05-07-11', '05-08-11', '05-09-11', '05-10-11'],
columns=['R003', 'R004', 'R005', 'R006', 'R007']
)
if False:
df_1 = pd.DataFrame({'A': [0, 1, 2], 'B':[3,4,5]})
print df_1
df_2 = pd.DataFrame([[0,1,2],[3,4,5]], columns=['A','B','C'])
print df_2
if False:
print ridership_df.iloc[0]
print ridership_df.loc['05-05-11']
print ridership_df['R003']
print ridership_df.iloc[1, 3]
if False:
print ridership_df.iloc[1:4]
if False:
print ridership_df[['R003','R005']]
if False:
df = pd.DataFrame({'A':[0,1,2],'B':[3,4,5]})
print df.sum()
print df.sum(axis=1)
print df.values.sum()
def mean_riders_for_max_station(ridership):
overall_mean = None
mean_for_max = None
return (overall_mean, mean_for_max)
import numpy as np
ridership = np.array([
[ 0, 0, 2, 5, 0],
[1478, 3877, 3674, 2328, 2539],
[1613, 4088, 3991, 6461, 2691],
[1560, 3392, 3826, 4787, 2613],
[1608, 4802, 3932, 4477, 2705],
[1576, 3933, 3909, 4979, 2685],
[ 95, 229, 255, 496, 201],
[ 2, 0, 1, 27, 0],
[1438, 3785, 3589, 4174, 2215],
[1342, 4043, 4009, 4665, 3033]
])
if False:
print ridership[1, 3]
print ridership[1:3, 3:5]
print ridership[1, :]
if False:
print ridership[0, :] + ridership[1, :]
print ridership[:, 0] + ridership[:, 1]
if False:
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
b = np.array([[1,1,1],[2,2,2],[3,3,3]])
print a + b
def mean_riders_for_max_station(ridership):
overall_mean = None
mean_for_max = None
return (overall_mean, mean_for_max)
import pandas as pd
if False:
s = pd.Series([1,2,3,4,5])
def add_one(x):
return x + 1
print s.apply(add_one)
names = pd.Series([
'Andre Agassi',
'Barry Bonds',
'Christopher Columbus',
'Daniel Defoe',
'Emilio Estevez',
'Fred Flintstone',
'Greta Garbo',
'Humbert Humbert',
'Ivan Ilych',
'James Joyce',
'Keira Knightley',
'Lois Lane',
'Mike Myers',
'Nick Nolte',
'Ozzy Osbourne',
'Pablo Picasso',
'Quirinus Quirrell',
'Rachael Ray',
'Susan Sarandon',
'Tina Turner',
'Ugueth Urbina',
'Vince Vaughn',
'Woodrow Wilson',
'Yoji Yamada',
'Zinedine Zidane'
])
def reverse_names(names):
split_name = name.split(" ")
first_name = split_name[0]
last_name = split_name[1]
return last_name + ', ' + first_name
vector 123 * scalor 3 = 123123123, 369, error
these are reasonable answers
More vectorized operation
math operation
add, subtract, multiply, divide, expertise
Logical opereation
&, |, ~
Comparison operations
>, >=, <, <=, ==, !=
import numpy as np if False: a = np.array([1, 2, 3, 4]) b = np.array([1, 2, 1, 2]) print a + b print a – b print a * b print a / b print a ** b if False a = np.array([1, 2, 3, 4]) b = 2 print a + b print a – b print a * b print a / b print a ** b if False: a = np.array([1,2,3,4,5]) b = np.array([5,4,3,2,1]) print a > b print a >= b print a < b print a <= b print a == b print a != b
code snippet
import numpy as np
a = np.array([1,2,3,4])
b = a
a += np.array([1,1,1,1])
print b
+= operates in-place while + does not
import pandas as pandas if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2
import numpy as np
countries = np.array([
'Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas',
'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
'Belize', 'Benin', 'Bhutan', 'Bolivia',
'Bosnia and Herzegovina'
])
employment = np.array([
55.70000076, 51.40000153, 50.5 , 75.69999695,
58.40000153, 40.09999847, 61.5 , 57.09999847,
60.90000153, 66.59999847, 60.40000153, 68.09999847,
66.90000153, 53.40000153, 48.59999847, 56.79999924,
71.59999847, 58.40000153, 70.40000153, 41.20000076
])
if False:
print countries[0]
print countries[3]
if False:
print countries[0:3]
print countries[:3]
print countries[17:]
print countries[:]
if False:
print countries.dtype
print employment.dtype
print np.array([0, 1, 2, 3]).dtype
print np.array([1.0, 1.5, 2.0, 2.5]).dtype
print np.array([True, False, True]).dtype
print np.array(['AL', 'AK', 'AZ', 'AR', 'CA']).dtype
if False:
for country in countries:
print 'Examining country {}'.format(country)
for i in range(len(countries)):
country = countries[i]
country_employment = employment[i]
print 'Country {} has employment {}'.format(country,
country_employment)
if False:
print employment.mean()
print employment.std()
print employment.max()
print employment.sum()
def max_employment(countries, employment):
max_country = None
max_value = None
return (max_country, max_value)