""" Program that grabs the specific lecture URLs from the CS51A course home page """ # David Kauchak from urllib.request import urlopen from url_extractor import write_list_to_file COURSE_PAGE = "http://www.cs.pomona.edu/~dkauchak/classes/cs51a/" def get_note_urls_improved(page): """ Get the lecture URLs available from the course web page and return them in a list """ web_page = urlopen(page) # read in all the text at once and decode it page_text = web_page.read().decode('ISO-8859-1') urls = [] # all of the urls for lectures, start with "lectures/" search_line = "lectures/" begin_index = page_text.find(search_line) while begin_index != -1: end_index = page_text.find('"', begin_index) urls.append(page + page_text[begin_index:end_index]) begin_index = page_text.find(search_line, end_index) return urls def get_note_files_only(page): """ Get the lecture URLs available from the course web page and return them in a list """ web_page = urlopen(page) # read in all the text at once and decode it page_text = web_page.read().decode('ISO-8859-1') urls = [] # all of the urls for lectures, start with "lectures/" search_line = "lectures/" begin_index = page_text.find(search_line) while begin_index != -1: end_index = page_text.find('"', begin_index) file_begin = begin_index + len(search_line) urls.append(page_text[file_begin:end_index]) begin_index = page_text.find(search_line, end_index) return urls def write_lecture_improved(outfile): # get the lecture urls lecture_urls = get_note_urls_improved(COURSE_PAGE) # print them out to the file write_list_to_file(lecture_urls, outfile)