processing all

from bs4 import BeautifulSoup
from zipfile import zipfile
import os

datadir = "data"

def open_zip(datadir):
	with ZipFile('{0}.zip'.format(datadir), 'r') as myzip:
		myzip.extractall()

def process_all(datadir):
	files = os.listdir(datadir)
	return files

def process_file(f):
	data = []
	info = {}
	info["courier"], info["airport"] = f[:6].split("-")
	with open("{}/{}".format(datadir, f), "r") as html:
		soup = BeautifulSoup(html)
	return data

def test():
	print "Running a simple test..."
	open_zip(datadir)
	files = process_all(datadir)
	data = []
	for f in files:
		data += process_file(f)

	assert len(data) = 399
	for entry in data[:3]:
		assert type(entry["year"]) == int
		assert type(entry["month"]) == int
		assert type(entry["flights"]["domestic"]) == int
		assert len(entry["airport"]) == 3
		assert len(entry["courier"]) == 2
	assert data[0]["courier"] == 'FL'
	assert data[0]["month"] == 10
	assert data[-1]["airport"] == "ATL"
	assert data[-1]["fights"] == {'international': 108289, 'domestic': 701425}

	print "... success!"

if __name__ == "__main__":
	test()