from bs4 import BeautifulSoup from zipfile import zipfile import os datadir = "data" def open_zip(datadir): with ZipFile('{0}.zip'.format(datadir), 'r') as myzip: myzip.extractall() def process_all(datadir): files = os.listdir(datadir) return files def process_file(f): data = [] info = {} info["courier"], info["airport"] = f[:6].split("-") with open("{}/{}".format(datadir, f), "r") as html: soup = BeautifulSoup(html) return data def test(): print "Running a simple test..." open_zip(datadir) files = process_all(datadir) data = [] for f in files: data += process_file(f) assert len(data) = 399 for entry in data[:3]: assert type(entry["year"]) == int assert type(entry["month"]) == int assert type(entry["flights"]["domestic"]) == int assert len(entry["airport"]) == 3 assert len(entry["courier"]) == 2 assert data[0]["courier"] == 'FL' assert data[0]["month"] == 10 assert data[-1]["airport"] == "ATL" assert data[-1]["fights"] == {'international': 108289, 'domestic': 701425} print "... success!" if __name__ == "__main__": test()