import os import urllib import webapp2 import jinja2 from apiclient.discovery import build from optparse import OptionParser JINJA_ENVIRONMENT = jinja2.Environment( loader=jinja2.FileSystemLoader(os.path.dirname(__file__)), extensions=['jinja2.ext.autoescape']) DEVELPOER_KEY = "REPLACE_ME" YOUTUBE_API_SERVICE_NAME = "youtube" YOUTUBE_API_VERSION = "v3" class MainHandler(webapp2.RequestHandler): def get(self): if DEVELOPER_KEY == "REPLACE_ME": self.response.write("""You must set up a project and get an API key to run this project. Please visit <landing page> to do so.""" else: youtube = build( YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY) search_response = youtube.search().list( q="Hello", part="id,snippet", maxResults=5 ).execute() videos = [] channels = [] playlists = [] for search_result in search_response.get("items", []): if search_result["id"]["kind"] == "youtube#video": videos.append("%s (%s)" % (search_result["snippet"]["title"], search_result["id"]["videoId"])) elif search_result["id"]["kind"] == "youtube#channel": channels.append("%s (%s)" % (search_result["snippet"]["title"], search_result["id"]["channelId"])) elif search_result["id"]["kind"] == "youtube#playlist": playlists.append("%s (%s)" % (search_result["snippet"]["title"], search_result["id"]["playlistId"])) template_values = { 'videos': videos, 'channels': channels, 'playlists': playlists } self.response.headers['Content-type'] = 'text/plain' template = JINJA_ENVIRONMENT.get_template('index.html') self.response.write(template.render(template_values)) app = webapp2.WSGIApplication([ ('/.*', MainHandler) ], debug=True)
Category: Python
Optimizer
an optimizer
– find minimum values of functions
– build parameterized models based on data
– refine allocations to stocks in portfolios
f(x) = x^2 + x^3 + s
f(x) = (x-1.5)^2 + 0.5
"""Minimize an objective function, using SciPy.""" import pandas as pd import matplotlib.pyplot as plt import numpy as np import scipy.optimize as spo def f(X): """Given a scalar X, return some value (a real number).""" Y = (X - 1.5)**2 + 0.5 print "X = {}, Y = {}".format(X, Y) return Y def test_run(): Xguess = 2.0 min_result = spo.minimize(f, Xguess, method='SLSQP', options={'disp': True}) print "Minima found at:" print "X = {}, Y = {}".format(min_result.x, min_result.fun) if __name__ == "__main__": test_run()
Pandas Fillna()
Pandas Fillna() documentation
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html
DataFrame.fillna(value=None, method=None, axis=None, inplace=False, limit=None, downcast=None, **kwargs)
fillna(method='ffill')
"""Plot a histogram""" import pandas as pd import matplotlib.pyplot as plt from util import get_data, plot_data def compute_daily_returns(df): daily_returns = df.copy() daily_returns[1:] = (df[1:] / df[:-1].values) - 1 daily_returns.ix[0, :] = 0 return daily_returns def test_run(): dates = pd.date_range('2009-01-01','2012-12-31') symbols = ['SPY'] df = get_data(symbols, dates) plot_data(df) daily_returns - compute_daily_returns(df) plot_data(daily_returns, title="Daily returns", ylabel="Daily returns") if __name__ == "__main__": test_run()
scatterplots in python
"""Scatterplot.""" import pandas as pd import matplotlib.pyplot as plt from util import get_data, plot_data def compute_daily_returns(df): daily_returns = df.copy() daily_returns[1:] = (df[1:] / df[:-1].values) - 1 daily_returns.ix[0, :] = 0 return daily_returns def test_run(): dates = pd.date_range('2009-01-01', '2012-12-31') symbols = ['SPY', 'XOM', 'GLD'] df = get_data(symbols, dates) daily_returns = compute_daily_returns(df) daily_returns.plot(kind='scatter',x='SPY',y='XOM') plt.show() if __name__ == "__main__": test_run()
Arithmetic operations
import numpy as np def test_run(): a = np.array([(1, 2, 3, 4, 5),(10, 20, 30, 40, 50)]) print "Original array a:\n", a print "\nMultiply a by 2:\n", 2 * a if __name__ == "__main__": test_run()
Rolling statistics is buying opportunity
rolling standard dev
def test_run(): dates = pd.date_range('2012-01-01','2012-12-31') symbols = ['SPY'] df = get_data(symbols, dates) ax = df['SPY'].plot(title="SPY rolling mean", label='SPY') rm_SPY = pd.rolling_mean(df['SPY'], window=20) rm_SPY.plot(label='Rolling mean', ax=ax)
Array attributes
import numpy as np def test_run(): a = np.random.random((5, 4)) print a print a.shape if __name__ == "__main__": test_run()
import numpy as np def test_run(): a = np.random.random((5, 4)) print a.size if __name__ == "__main__": test_run()
import numpy as np def test_run(): np.random.seed(693) a = np.random.randint(0, 10, size=(5, 4)) print "Array:\n", a if __name__ == "__main__": test_run()
Locate maximum value
import numpy as np def get_max_index(a): return a.argmax() def test_run(): a = np.array([9, 6, 2, 3, 12, 14, 7, 10], dtype=np.int32) print "Array:", a print "Maximum value", a.max() print "Index of max.:", get_max_index(a) if __name__ == "__main__": test_run()
Timing python operations
import time def time_run(): t1 = time.time() print "ML4T" t2 = time.time() print "The time taken by print statement is ",t2 - t1," seconds" if __name__ == "__main__": test_run()
Accessing array element
import numpy as np def test_run(): a = np.random.rand(5, 4) print "Array:\n", a element = a[3, 2] print element if __name__ == "__main__": test_run()
Indexing an array with another array
import numpy as np def test_run(): a = np.random.rand(5) indices = np.array([1,1,2,3]) print a[indices] if __name__ == "__main__": test_run()
Replace a slice
nd1[0:2,0:2]=nd2[-2:,2:4]
"""Creating NumPy arrays.""" import numpy as np def test_run(): print np.array([(2, 3, 4),(5, 6, 7)]) if __name__ == "__main__": test_run()
"""Creating NumPy arrays.""" import numpy as np def test_run(): print np.empty(5) print np.empty((5,4,3)) if __name__ == "__main__": test_run()
import numpy as np def test_run(): print np.ones((5, 4), dtype=np.int_) if __name__ == "__main__": test_run()
plot on “equal footing”
The best way to normalize price data so that all prices start at 1.0
df1 = df1/df1[0]
import os import pandas as pd import matplotlib.pyplot as plt def plot_selected(df, columns, start_index, end_index): def symbol_to_path(symbol, base_dir="data"): """Return CSV file path given ticker symbol.""" return os.path.join(base_dir, "{}.csv".format(str(symbol))) def get_data(symbols, dates): df = pd.DataFrame(index=dates) if 'SPY' not in symbols: symbols.insert(0, 'SPY') for symbol in symbols: df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) df_temp = df_temp.rename(colums={'Adj Close': symbol}) df = df.join(df_temp) if symbol = 'SPY': df = df.dropna(subset=["SPY"]) return df def plot_data(df, title="Stock prices"): ax = df.plot(title=title, fontsize=12) ax.set_xlabel("Date") ax.set_ylabel("Price") plt.show() def test_run(): dates = pd.date_range('2010-01-01', '2010-12-31') symbols = ['GOOG', 'IBM', 'GLD'] df = get_data(symbols, dates) plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01') if __name__ == "__main__": test_run()
Pandas dataframe
Problems to solve
-data ranges
-multiple stocks
-align dates
-proper date order
Building a dataframe
'''Build a dataframe in pandas''' import pandas as pd def test_run(): start_date='2010-01-22' end_date='2010-01-26' dates=pd.date_range(start_date,end_date) print dates if __name__ == "__main__": test_run()
'''Build a dataframe in pandas''' import pandas as pd def test_run(): start_date='2010-01-22' end_date='2010-01-26' dates=pd.date_range(start_date,end_date) #Create an empty dataframe df1=pd.DataFrame(index=dates) #Read SPY data into temporary dataframe dfSPY = pd.read_csv("data/SPY.csv",index_col="Date",parse_dates=True) print dfSPY #Join the two dataframes using DataFrame.join() #df1=df1.join(dfSPY) #print df1 if __name__ == "__main__": test_run()
"""Utility functions""" import os import pandas as pd def symbol_to_path(symbol, base_dir="data"): """Return CSV file path given ticker symbol.""" return os.path.join(base_dir, "{}.csv".format(str(symbol))) def get_data(symbols, dates): """Read stock data (adjusted close) for given symbols from csv files""" df = pd.DataFrame(index=dates) if 'SPY' not in symbols: symbols.insert(0, 'SPY') for symbol in symbols: return df def test_run(): # Define a data range dates = pd.date_range('2010-01-22','2010-01-26') # Choose stock symbols to read symbols = ['GOOG', 'IBM', 'GLD'] # Get stock data df = get_data(symbols, dates) print df if __name__ == "__main__": test_run()
Plotting stock price data
import pandas as pd import matplotlib.pyplot as plt def test_run(): df = pd.read_csv("data/APPL.csv") print df['Adj Close'] df['Adj Close'].plot() plot.show() if __name__ == "__main__": test_run()
Here we go
import pandas as pd import matplotlib.pyplot as plt def test_run(): df = pd.read_csv("data/IBM.csv") df['High'].plot() plot.show() if __name__ == "__main__": test_run()
plot two column, you can observe two lines
import pandas as pd import matplotlib.pyplot as plt def test_run(): df = pd.read_csv("data/APPL.csv") df[['Close','Adj Close']].plot() plot.show() if __name__ == "__main__": test_run()
unsupported operand type(s) for +: ‘int’ and ‘str’
Compute mean volume
df = pd.read_csv("data/{}.csv".format(symbol)) # read in data s = sum(df) l = len(df) print(s/l)
unsupported operand type(s) for +: 'int' and 'str'
TypeError showed.
We must calculate dataframe mean.
import pandas as pd def get_mean_volume(sympol): df = pd.read_csv("data/{}.csv".format(symbol)) print(df.mean()) def test_run(): for symbol in ['AAPL', 'IBM']: print "Mean Volume" print symbol, get_mean_volume(symbol) if __name__ == "__main__": # if run standalone test_run()
Mean Volume AAPL Open 1.363176e+02 High 1.380075e+02 Low 1.344201e+02 Close 1.362885e+02 Volume 2.149143e+07 Adj Close 1.282174e+02 dtype: float64 None Mean Volume IBM Open 1.109328e+02 High 1.121182e+02 Low 1.098853e+02 Close 1.110325e+02 Volume 7.103571e+06 Adj Close 1.022113e+02 dtype: float64 None
Here is a solution
import pandas as pd def get_mean_volume(sympol): df = pd.read_csv("data/{}.csv".format(symbol)) return df['Volume'].mean() def test_run(): for symbol in ['AAPL', 'IBM']: print "Mean Volume" print symbol, get_mean_volume(symbol) if __name__ == "__main__": # if run standalone test_run()
Mean Volume AAPL 21491431.3386 Mean Volume IBM 7103570.80315