Free prototype
invision
https://www.invisionapp.com/
3 month trial for free
https://www.invisionapp.com/signup/udacity
ソフトウェアエンジニアの技術ブログ:Software engineer tech blog
随机应变 ABCD: Always Be Coding and … : хороший
Free prototype
invision
https://www.invisionapp.com/
3 month trial for free
https://www.invisionapp.com/signup/udacity
what is the mode?
2, 5, 5, 9, 8
-> 5
most of the number occurence
frequency is the highest
import pandas as pd grades_df = pd.DataFrame( data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87], 'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]}, index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio', 'Fred', 'Greta', 'Humbert', 'Ivan', 'James'] ) if False: def convert_grades_curve(exam_grades): return pd.qcut(exam_grades, [0, 0.1, 0.2, 0.5, 0.8, 1], labels=['F','D','C','B','A']) print convert_grades_curve(grades_df['exam1']) print grades_df.apply(convert_grades_curve) def standardize(df): return None
import numpy as np import pandas as pd df = pd.DataFrame({ 'a': [4,5,3,1,2], 'b': [20,10,40,50,30], 'c': [25,20, 5, 15, 10] }) if False: print df.apply(np.mean) print df.apply(np.max) def second_largest(df): return None
import pandas as pd if False: df1 = pd.DataFrame({'a':[1,2,3],'b':[4,5,6],'c':[7,8,9]}) df2 = pd.DataFrame({'a':[10,20,30],'b':[40,50,60],'c':[70,80,90]}) print df1 + df2 if False: df1 = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6], 'c':['7','8','9']}) df2 = pd.DataFrame({'d':[10,20,30], 'c':[40,50,60], 'b':[70,80,90]}) df1 + df2 if False: df1 = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6], 'c':[7,8,9]}, index=['row1','row2','row3']) df2 = pd.DataFrame({'a':[10,20,30],'b':[40,50,60],'c':[70,80,90]}, index=['row4','row3','row2']) print df1 + df2 entries_and_exits = pd.DataFrame({ 'ENTRIESn': [3144312, 3144335, 3144353, 3144424, 3144594, 3144808, 3144895, 3144905, 3144941, 3145094], 'EXITSn': [1088151, 1088159, 1088177, 1088231, 1088275, 1088317, 1088328, 1088331, 1088420, 1088753] }) def get_hourly_entries_and_exits(entries_and_exits): return None
import pandas as pd if False: df = pd.DataFrame({ 'a':[1, 2, 3], 'b':[10, 20, 30], 'c':[5, 10, 15] }) def add_one(x): return x + 1 print df.applymap(add_one) grades_df = pd.DataFrame( data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87], 'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]}, index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio', 'Fred', 'Greta', 'Humbert', 'Ivan', 'James'] ) def convert_grades(grades): return None
import pandas as pd filename = 'xxxx.csv' subway_df = pd.read_csv(filename) def correlation(x, y): std_x = (x - x.mean()) / x.std(ddof=0) std_y = (y - y.mean()) / y.std(ddof=0) return (std_x * std_y).mean() entries = subway_df['ENTRIESn_hourly'] cum_entries = subway_df['meanprecipi'] rain = subway_df['meanprecipi'] temp = subway_df['meantempi'] print correlation(entries, rain) print correlation(entries, temp) print correlation(rain, temp) print correlation(entries, cum_entries)
import pandas as pd ridership_df = pd.DataFrame( data=[[ 0, 0, 2, 5, 0], [1478, 3877, 3674, 2328, 2539], [1613, 4088, 3991, 6461, 2691], [1560, 3392, 3826, 4787, 2613], [1608, 4802, 3932, 4477, 2705], [1576, 3933, 3909, 4979, 2685], [ 95, 229, 255, 496, 201], [ 2, 0, 1, 27, 0], [1438, 3785, 3589, 4174, 2215], [1342, 4043, 4009, 4665, 3033]], index=['05-01-11', '05-02-11', '05-03-11', '05-04-11', '05-05-11', '05-06-11', '05-07-11', '05-08-11', '05-09-11', '05-10-11'], columns=['R003', 'R004', 'R005', 'R006', 'R007'] ) if False: df_1 = pd.DataFrame({'A': [0, 1, 2], 'B':[3,4,5]}) print df_1 df_2 = pd.DataFrame([[0,1,2],[3,4,5]], columns=['A','B','C']) print df_2 if False: print ridership_df.iloc[0] print ridership_df.loc['05-05-11'] print ridership_df['R003'] print ridership_df.iloc[1, 3] if False: print ridership_df.iloc[1:4] if False: print ridership_df[['R003','R005']] if False: df = pd.DataFrame({'A':[0,1,2],'B':[3,4,5]}) print df.sum() print df.sum(axis=1) print df.values.sum() def mean_riders_for_max_station(ridership): overall_mean = None mean_for_max = None return (overall_mean, mean_for_max)
import numpy as np ridership = np.array([ [ 0, 0, 2, 5, 0], [1478, 3877, 3674, 2328, 2539], [1613, 4088, 3991, 6461, 2691], [1560, 3392, 3826, 4787, 2613], [1608, 4802, 3932, 4477, 2705], [1576, 3933, 3909, 4979, 2685], [ 95, 229, 255, 496, 201], [ 2, 0, 1, 27, 0], [1438, 3785, 3589, 4174, 2215], [1342, 4043, 4009, 4665, 3033] ]) if False: print ridership[1, 3] print ridership[1:3, 3:5] print ridership[1, :] if False: print ridership[0, :] + ridership[1, :] print ridership[:, 0] + ridership[:, 1] if False: a = np.array([[1,2,3],[4,5,6],[7,8,9]]) b = np.array([[1,1,1],[2,2,2],[3,3,3]]) print a + b def mean_riders_for_max_station(ridership): overall_mean = None mean_for_max = None return (overall_mean, mean_for_max)
import pandas as pd if False: s = pd.Series([1,2,3,4,5]) def add_one(x): return x + 1 print s.apply(add_one) names = pd.Series([ 'Andre Agassi', 'Barry Bonds', 'Christopher Columbus', 'Daniel Defoe', 'Emilio Estevez', 'Fred Flintstone', 'Greta Garbo', 'Humbert Humbert', 'Ivan Ilych', 'James Joyce', 'Keira Knightley', 'Lois Lane', 'Mike Myers', 'Nick Nolte', 'Ozzy Osbourne', 'Pablo Picasso', 'Quirinus Quirrell', 'Rachael Ray', 'Susan Sarandon', 'Tina Turner', 'Ugueth Urbina', 'Vince Vaughn', 'Woodrow Wilson', 'Yoji Yamada', 'Zinedine Zidane' ]) def reverse_names(names): split_name = name.split(" ") first_name = split_name[0] last_name = split_name[1] return last_name + ', ' + first_name
vector 123 * scalor 3 = 123123123, 369, error
these are reasonable answers
More vectorized operation
math operation
add, subtract, multiply, divide, expertise
Logical opereation
&, |, ~
Comparison operations
>, >=, <, <=, ==, !=
import numpy as np if False: a = np.array([1, 2, 3, 4]) b = np.array([1, 2, 1, 2]) print a + b print a – b print a * b print a / b print a ** b if False a = np.array([1, 2, 3, 4]) b = 2 print a + b print a – b print a * b print a / b print a ** b if False: a = np.array([1,2,3,4,5]) b = np.array([5,4,3,2,1]) print a > b print a >= b print a < b print a <= b print a == b print a != b
code snippet
import numpy as np
a = np.array([1,2,3,4])
b = a
a += np.array([1,1,1,1])
print b
+= operates in-place while + does not
import pandas as pandas if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2
import numpy as np countries = np.array([ 'Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina' ]) employment = np.array([ 55.70000076, 51.40000153, 50.5 , 75.69999695, 58.40000153, 40.09999847, 61.5 , 57.09999847, 60.90000153, 66.59999847, 60.40000153, 68.09999847, 66.90000153, 53.40000153, 48.59999847, 56.79999924, 71.59999847, 58.40000153, 70.40000153, 41.20000076 ]) if False: print countries[0] print countries[3] if False: print countries[0:3] print countries[:3] print countries[17:] print countries[:] if False: print countries.dtype print employment.dtype print np.array([0, 1, 2, 3]).dtype print np.array([1.0, 1.5, 2.0, 2.5]).dtype print np.array([True, False, True]).dtype print np.array(['AL', 'AK', 'AZ', 'AR', 'CA']).dtype if False: for country in countries: print 'Examining country {}'.format(country) for i in range(len(countries)): country = countries[i] country_employment = employment[i] print 'Country {} has employment {}'.format(country, country_employment) if False: print employment.mean() print employment.std() print employment.max() print employment.sum() def max_employment(countries, employment): max_country = None max_value = None return (max_country, max_value)