import re

# load the word list - you may need to edit this in case
# you saved the file wordlist.txt somewhere else.
listfilelines = open('wordlist.txt','r').read()


#INTRODUCTION:
# specify a regular expression pattern
# here \b is the start/end of a word
#       a means the letter a
#       .* means any number of characters
#       a means the letter a again
#      \b is the start/end of a word
result = re.findall(r'\bd.*c\b',listfilelines)
print result

#Other examples:
# All words containing the letters "wind"
#result = re.findall(r'\b.*wind.*\b',listfilelines)
#print result
# All words in capitals which start with a capital A
#result = re.findall(r'\bA[A-Z]*\b',listfilelines)
#print result
# All words which begin with a ku
#result = re.findall(r'\bku.*\b',listfilelines)
#print result


#Note the use of \b to indicate the start and end of each word.


#EXERCISE 1
#1a A list of words which start with the characters 'zy'.
#hint: It will be similar to the last example (for words starting with 'ku')
result_exercise1a = re.findall(r'\bTODO\b',listfilelines)
print result_exercise1a

#1b. A list of words all exactly 10 letters long.
#hint: use the {2} syntax
result_exercise1b = re.findall(r'\bTODO\b',listfilelines)
print result_exercise1b

#1c. A list of words which have three vowels (a,e,i,o,u) in a row
result_exercise1c = re.findall(r'\bTODO\b',listfilelines)
print result_exercise1c


#EXERCISE 2
#Write a function (an empty function called checkaddress
#is already ready to use) which takes a string as input,
#and tells you whether that string is an email address. A
#valid email address should contain the character @, and
#not have any spaces.
#
#Hint: use the match function, as follows:
#
#if re.match('yes',string):
#
#which would tell you if the string contained the characters 'yes'

def checkaddress(address):
	#TODO: IMPLEMENT THIS FUNCTION	
	pass

#test your function!
checkaddress('not@a@valid.email.address');
checkaddress('valid@address.com');


#EXERCISE 3
#Write some takes the following string, and returns only the phone numbers. A phone number should be considered to start with +256,

s = "The Faculty of Computing and IT front desk can be contacted on +25641 540628 or +25641540620"

#Hint: re.findall(p,s) gives all the matches for pattern p in the string s.
#Don't forget to deal with phone numbers with spaces in!


#EXERCISE 4
#Write a function which takes a string and changes all the occurrences of numbers to their text equivalent (e.g. a 1 into a 'one', a 2 into a 'two'). Hint: You'll need to use the compile method too. The substitution of 1 into one has already been done.

def makenumbers(s):
	p = re.compile('1')
	return p.sub('one ',s)

#test your function!
print makenumbers(s)


#EXERCISE 5
#The file enron.txt contains some personal emails between Enron executives, shortly before many of them were convicted for fraud and the company was bankrupted.
#
#Use regular expressions to obtain: 
#
#1. A list of the subject lines of the emails
#2. A list of the unique email addresses. (hint search google for 'finding unique strings in a list python')
#
#Open it using
enron = open('enron.txt','r').read()


#EXERCISE 6
#Define another single function clean(input_string, bad_words) 
#which removes any of the words in bad_words from input_string. 
#You could use the substitute ('sub') function you used 
#in the regular expressions exercises.
#
#Now run this from the shell:
#
#import matchingwords
#
#print matchingwords.clean('This erfkh sentence needs some xffvn cleaning up',['erfkh', 'xffvn'])
#
#and verify that you get the cleaned-up sentence as the output:
#
#'This sentence needs some cleaning up'

#define 'clean' function here.