python api sample2

import os
import urllib
import webapp2
import jinja2

from apiclient.discovery import build
from optparse import OptionParser

JINJA_ENVIRONMENT = jinja2.Environment(
	loader=jinja2.FileSystemLoader(os.path.dirname(__file__)),
	extensions=['jinja2.ext.autoescape'])

DEVELPOER_KEY = "REPLACE_ME"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"

class MainHandler(webapp2.RequestHandler):

	def get(self):
		if DEVELOPER_KEY == "REPLACE_ME":
			self.response.write("""You must set up a project and get an API key
									to run this project. Please visit 
									<landing page> to do so."""
		else:
			youtube = build(
				YOUTUBE_API_SERVICE_NAME,
				YOUTUBE_API_VERSION,
				developerKey=DEVELOPER_KEY)
			search_response = youtube.search().list(
				q="Hello",
				part="id,snippet",
				maxResults=5
				).execute()

				videos = []
				channels = []
				playlists = []

				for search_result in search_response.get("items", []):
					if search_result["id"]["kind"] == "youtube#video":
						videos.append("%s (%s)" % (search_result["snippet"]["title"],
							search_result["id"]["videoId"]))
					elif search_result["id"]["kind"] == "youtube#channel":
						channels.append("%s (%s)" % (search_result["snippet"]["title"],
							search_result["id"]["channelId"]))
					elif search_result["id"]["kind"] == "youtube#playlist":
						playlists.append("%s (%s)" % (search_result["snippet"]["title"],
							search_result["id"]["playlistId"]))

				template_values = {
					'videos': videos,
					'channels': channels,
					'playlists': playlists
				}

				self.response.headers['Content-type'] = 'text/plain'
				template = JINJA_ENVIRONMENT.get_template('index.html')
				self.response.write(template.render(template_values))				

			app = webapp2.WSGIApplication([
				('/.*', MainHandler)
				], debug=True)

Optimizer

an optimizer
– find minimum values of functions
– build parameterized models based on data
– refine allocations to stocks in portfolios
f(x) = x^2 + x^3 + s
f(x) = (x-1.5)^2 + 0.5

"""Minimize an objective function, using SciPy."""

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as spo

def f(X):
	"""Given a scalar X, return some value (a real number)."""
	Y = (X - 1.5)**2 + 0.5
	print "X = {}, Y = {}".format(X, Y)
	return Y

def test_run():
	Xguess = 2.0
	min_result = spo.minimize(f, Xguess, method='SLSQP', options={'disp': True})
	print "Minima found at:"
	print "X = {}, Y = {}".format(min_result.x, min_result.fun)

if __name__ == "__main__":
	test_run()

Pandas Fillna()

Pandas Fillna() documentation
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html
DataFrame.fillna(value=None, method=None, axis=None, inplace=False, limit=None, downcast=None, **kwargs)

fillna(method='ffill')
"""Plot a histogram"""

import pandas as pd
import matplotlib.pyplot as plt

from util import get_data, plot_data

def compute_daily_returns(df):
	daily_returns = df.copy()
	daily_returns[1:] = (df[1:] / df[:-1].values) - 1
	daily_returns.ix[0, :] = 0
	return daily_returns

def test_run():
	dates = pd.date_range('2009-01-01','2012-12-31')
	symbols = ['SPY']
	df = get_data(symbols, dates)
	plot_data(df)

	daily_returns - compute_daily_returns(df)
	plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

if __name__ == "__main__":
	test_run()

scatterplots in python

"""Scatterplot."""

import pandas as pd
import matplotlib.pyplot as plt

from util import get_data, plot_data

def compute_daily_returns(df):
	daily_returns = df.copy()
	daily_returns[1:] = (df[1:] / df[:-1].values) - 1
	daily_returns.ix[0, :] = 0
	return daily_returns

def test_run():
	dates = pd.date_range('2009-01-01', '2012-12-31')
	symbols = ['SPY', 'XOM', 'GLD']
	df = get_data(symbols, dates)
	
	daily_returns = compute_daily_returns(df)

	daily_returns.plot(kind='scatter',x='SPY',y='XOM')
	plt.show()

if __name__ == "__main__":
	test_run()

Arithmetic operations

import numpy as np

def test_run():
	a = np.array([(1, 2, 3, 4, 5),(10, 20, 30, 40, 50)])
	print "Original array a:\n", a

	print "\nMultiply a by 2:\n", 2 * a

if __name__ == "__main__":
	test_run()

Rolling statistics is buying opportunity
rolling standard dev

def test_run():
	dates = pd.date_range('2012-01-01','2012-12-31')
	symbols = ['SPY']
	df = get_data(symbols, dates)

	ax = df['SPY'].plot(title="SPY rolling mean", label='SPY')

	rm_SPY = pd.rolling_mean(df['SPY'], window=20)

	rm_SPY.plot(label='Rolling mean', ax=ax)

Array attributes

import numpy as np

def test_run():
	a = np.random.random((5, 4))
	print a
	print a.shape

if __name__ == "__main__":
	test_run()
import numpy as np

def test_run():
	a = np.random.random((5, 4))
	print a.size

if __name__ == "__main__":
	test_run()
import numpy as np

def test_run():
	np.random.seed(693)
	a = np.random.randint(0, 10, size=(5, 4))
	print "Array:\n", a

if __name__ == "__main__":
	test_run()

Locate maximum value

import numpy as np

def get_max_index(a):
	return a.argmax()

def test_run():
	a = np.array([9, 6, 2, 3, 12, 14, 7, 10], dtype=np.int32)
	print "Array:", a

	print "Maximum value", a.max()
	print "Index of max.:", get_max_index(a)


if __name__ == "__main__":
	test_run()

Timing python operations

import time

def time_run():
	t1 = time.time()
	print "ML4T"
	t2 = time.time()
	print "The time taken by print statement is ",t2 - t1," seconds"

if __name__ == "__main__":
	test_run()

Accessing array element

import numpy as np

def test_run():
	a = np.random.rand(5, 4)
	print "Array:\n", a

	element = a[3, 2]
	print element

if __name__ == "__main__":
	test_run()

Indexing an array with another array

import numpy as np

def test_run():
	a = np.random.rand(5)
	indices = np.array([1,1,2,3])
	print a[indices]

if __name__ == "__main__":
	test_run()

Replace a slice

nd1[0:2,0:2]=nd2[-2:,2:4]

"""Creating NumPy arrays."""
import numpy as np

def test_run():
	print np.array([(2, 3, 4),(5, 6, 7)])

if __name__ == "__main__":
	test_run()
"""Creating NumPy arrays."""
import numpy as np

def test_run():
	print np.empty(5)
	print np.empty((5,4,3))

if __name__ == "__main__":
	test_run()
import numpy as np

def test_run():
	print np.ones((5, 4), dtype=np.int_)

if __name__ == "__main__":
	test_run()

plot on “equal footing”

The best way to normalize price data so that all prices start at 1.0
df1 = df1/df1[0]

import os
import pandas as pd
import matplotlib.pyplot as plt

def plot_selected(df, columns, start_index, end_index):

def symbol_to_path(symbol, base_dir="data"):
	"""Return CSV file path given ticker symbol."""
	return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates):
	df = pd.DataFrame(index=dates)
	if 'SPY' not in symbols:
		symbols.insert(0, 'SPY')

	for symbol in symbols:
		df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
			parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
		df_temp = df_temp.rename(colums={'Adj Close': symbol})
		df = df.join(df_temp)
		if symbol = 'SPY':
			df = df.dropna(subset=["SPY"])

		return df

def plot_data(df, title="Stock prices"):
	ax = df.plot(title=title, fontsize=12)
	ax.set_xlabel("Date")
	ax.set_ylabel("Price")
	plt.show()

def test_run():
	dates = pd.date_range('2010-01-01', '2010-12-31')

	symbols = ['GOOG', 'IBM', 'GLD']

	df = get_data(symbols, dates)

	plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01')

if __name__ == "__main__":
	test_run()

Pandas dataframe

Problems to solve
-data ranges
-multiple stocks
-align dates
-proper date order

Building a dataframe

'''Build a dataframe in pandas'''
import pandas as pd

def test_run():
	start_date='2010-01-22'
	end_date='2010-01-26'
	dates=pd.date_range(start_date,end_date)
	print dates

if __name__ == "__main__":
	test_run()
'''Build a dataframe in pandas'''
import pandas as pd

def test_run():
	start_date='2010-01-22'
	end_date='2010-01-26'
	dates=pd.date_range(start_date,end_date)

	#Create an empty dataframe
	df1=pd.DataFrame(index=dates)

	#Read SPY data into temporary dataframe
	dfSPY = pd.read_csv("data/SPY.csv",index_col="Date",parse_dates=True)
	print dfSPY

	#Join the two dataframes using DataFrame.join()
	#df1=df1.join(dfSPY)
	#print df1

if __name__ == "__main__":
	test_run()
"""Utility functions"""

import os
import pandas as pd

def symbol_to_path(symbol, base_dir="data"):
	"""Return CSV file path given ticker symbol."""
	return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates):
	"""Read stock data (adjusted close) for given symbols from csv files"""
	df = pd.DataFrame(index=dates)
	if 'SPY' not in symbols:
		symbols.insert(0, 'SPY')

	for symbol in symbols:

	return df

def test_run():
	# Define a data range
	dates = pd.date_range('2010-01-22','2010-01-26')

	# Choose stock symbols to read
	symbols = ['GOOG', 'IBM', 'GLD']

	# Get stock data
	df = get_data(symbols, dates)
	print df

if __name__ == "__main__":
	test_run()

Plotting stock price data

import pandas as pd
import matplotlib.pyplot as plt

def test_run():
	df = pd.read_csv("data/APPL.csv")
	print df['Adj Close']
	df['Adj Close'].plot()
	plot.show()

if __name__ == "__main__":
	test_run()

Here we go

import pandas as pd
import matplotlib.pyplot as plt

def test_run():
	df = pd.read_csv("data/IBM.csv")
	df['High'].plot()
	plot.show()

if __name__ == "__main__":
	test_run()

plot two column, you can observe two lines

import pandas as pd
import matplotlib.pyplot as plt

def test_run():
	df = pd.read_csv("data/APPL.csv")
	df[['Close','Adj Close']].plot()
	plot.show()

if __name__ == "__main__":
	test_run()

unsupported operand type(s) for +: ‘int’ and ‘str’

Compute mean volume

    df = pd.read_csv("data/{}.csv".format(symbol))  # read in data
    s = sum(df)
    l = len(df)
    print(s/l)
unsupported operand type(s) for +: 'int' and 'str'

TypeError showed.

We must calculate dataframe mean.

import pandas as pd

def get_mean_volume(sympol):
	df = pd.read_csv("data/{}.csv".format(symbol))
    print(df.mean())

def test_run():
	for symbol in ['AAPL', 'IBM']:
		print "Mean Volume"
		print symbol, get_mean_volume(symbol)

if __name__ == "__main__": # if run standalone
	test_run()
Mean Volume
AAPL Open         1.363176e+02
High         1.380075e+02
Low          1.344201e+02
Close        1.362885e+02
Volume       2.149143e+07
Adj Close    1.282174e+02
dtype: float64
None
Mean Volume
IBM Open         1.109328e+02
High         1.121182e+02
Low          1.098853e+02
Close        1.110325e+02
Volume       7.103571e+06
Adj Close    1.022113e+02
dtype: float64
None

Here is a solution

import pandas as pd

def get_mean_volume(sympol):
	df = pd.read_csv("data/{}.csv".format(symbol))
    return df['Volume'].mean()

def test_run():
	for symbol in ['AAPL', 'IBM']:
		print "Mean Volume"
		print symbol, get_mean_volume(symbol)

if __name__ == "__main__": # if run standalone
	test_run()
Mean Volume
AAPL 21491431.3386
Mean Volume
IBM 7103570.80315