def mapper():
for line in sys.stdin:
data = line.strip.split("")
for i in data:
cleaned_data = i.translate(string.maketrans("",""), string.punctuation).lower()
print "{0}\t{t}".format(cleaned_data,1)
mapper()
Reduce stage -> reducer
import sys
def reducer():
word_count = 0
old_key = None
for line in sys.stdin:
data = line.strip().split("\t")
if len(data) != 2:
continue
if old_key and old_key != this_key:
print"{0}\t{1}".format(old_key, word_count)
word_count = 0
old_key = this_key
word_count += float(count)
if old_key != None:
print "{0}\t{1}".format(old_key, word_count)
#! /bin/bash cat ../../data/aliceInWorderland.txt | python word_count_mapper.py | sort | python word_count_reducer.py