"""
Program that grabs the specific lecture URLs from
the CS51A course home page
"""

# David Kauchak

from urllib.request import urlopen
from url_extractor import write_list_to_file

COURSE_PAGE = "http://www.cs.pomona.edu/~dkauchak/classes/cs51a/"

def get_note_urls_improved(page):
    """
    Get the lecture URLs available from the course web page and
    return them in a list
    """
    web_page = urlopen(page)
    # read in all the text at once and decode it
    page_text = web_page.read().decode('ISO-8859-1')
    
    urls = []
    
    # all of the urls for lectures, start with "lectures/"
    search_line = "lectures/"
    
    begin_index = page_text.find(search_line)
    
    while begin_index != -1:
        end_index = page_text.find('"', begin_index)
        urls.append(page + page_text[begin_index:end_index])
        
        begin_index = page_text.find(search_line, end_index)
        
    
    return urls

def get_note_files_only(page):
    """
    Get the lecture URLs available from the course web page and
    return them in a list
    """
    web_page = urlopen(page)
    # read in all the text at once and decode it
    page_text = web_page.read().decode('ISO-8859-1')
    
    urls = []
    
    # all of the urls for lectures, start with "lectures/"
    search_line = "lectures/"
    
    begin_index = page_text.find(search_line)
    
    while begin_index != -1:
        end_index = page_text.find('"', begin_index)
        file_begin = begin_index + len(search_line)
        urls.append(page_text[file_begin:end_index])
        
        begin_index = page_text.find(search_line, end_index)
        
    return urls

def write_lecture_improved(outfile):
    # get the lecture urls
    lecture_urls = get_note_urls_improved(COURSE_PAGE)
    # print them out to the file
    write_list_to_file(lecture_urls, outfile)