Python_21_Udacity_Evans_Intro to CS_4_Responding to queries

总目录


课程页面:https://www.udacity.com/course/intro-to-computer-science--cs101
授课教师:Dave Evans https://www.cs.virginia.edu/~evans/
如下内容包含课程笔记和自己的扩展折腾

Add to index

def add_to_index(index,keyword,url):
    for i in range(len(index)):
        if keyword in index[i]:
            index[i][1] + [url]
            break
    index.append([keyword, [url]])
    return index# -*- coding: utf-8 -*-

# Define a procedure, add_to_index,
# that takes 3 inputs:

# - an index: [[,[,...]],...]
# - a keyword: String
# - a url: String

# If the keyword is already
# in the index, add the url
# to the list of urls associated
# with that keyword.

# If the keyword is not in the index,
# add an entry to the index: [keyword,[url]]

index = []

def add_to_index(index,keyword,url):
    for i in range(len(index)):
        if keyword in index[i]: # 改成index[i][0] == keyword比较严谨
            index[i][1] += [url] # 也可以写成index[i][1].append(url)
            return index
    index.append([keyword, [url]])
    return index



add_to_index(index,'udacity','http://udacity.com')
add_to_index(index,'computing','http://acm.org')
add_to_index(index,'udacity','http://npr.org')
print index
#>>> [['udacity', ['http://udacity.com', 'http://npr.org']],
#>>> ['computing', ['http://acm.org']]]

Lookup

# -*- coding: utf-8 -*-

# Define a procedure, lookup,
# that takes two inputs:

# - an index
# - keyword

# The procedure should return a list
# of the urls associated
# with the keyword. If the keyword
# is not in the index, the procedure
# should return an empty list.

index = [['udacity', ['http://udacity.com', 'http://npr.org']],
         ['computing', ['http://acm.org']]]

def lookup(index,keyword):
    for entry in index:
        if entry[0] == keyword:
            return entry[1]
    return []

print lookup(index,'udacity')
#>>> ['http://udacity.com','http://npr.org']

Add page to index

# -*- coding: utf-8 -*-

# Define a procedure, add_page_to_index,
# that takes three inputs:

#   - index
#   - url (String)
#   - content (String)

# It should update the index to include
# all of the word occurences found in the
# page content by adding the url to the
# word's associated url list.

index = []


def add_to_index(index,keyword,url):
    for entry in index:
        if entry[0] == keyword:
            entry[1].append(url)
            return
    index.append([keyword,[url]])

def add_page_to_index(index,url,content):
    content = content.split()
    for item in content:
        add_to_index(index, item, url)

add_page_to_index(index,'fake.text',"This is a test")
print index
#>>> [['This', ['fake.text']], ['is', ['fake.text']], ['a', ['fake.text']],
#>>> ['test',['fake.text']]]

Better Splitting

# 1 Gold Star

# The built-in .split() procedure works
# okay, but fails to find all the words on a page
# because it only uses whitespace to split the
# string. To do better, we should also use punctuation
# marks to split the page into words.

# Define a procedure, split_string, that takes two
# inputs: the string to split and a string containing
# all of the characters considered separators. The
# procedure should return a list of strings that break
# the source string up by the characters in the
# splitlist.

# 这个是我的算法,Udacity给的算法和我的思路些许不一样
def split_string(source,splitlist):
    r = []
    word = ""
    for char in source:
        if char not in splitlist:
            word += char
        else:
            if word != "":
                r.append(word)
            word = ""
    if not word == "":
        r.append(word)
    return r

"""
def split_string(source, splitlist):
    output = []
    atsplit = True
    for char in source:
        print char
        if char in splitlist:
            atsplit = True
        else:
            if atsplit:
                output.append(char)
                atsplit = False
                print output
            else:
                output[-1] = output[-1] + char
                print output
    return output
"""
out = split_string("This is a test-of the,string separation-code!"," ,!-")
print out
#>>> ['This', 'is', 'a', 'test', 'of', 'the', 'string', 'separation', 'code']

out = split_string("After  the flood   ...  all the colors came out.", " .")
print out
#>>> ['After', 'the', 'flood', 'all', 'the', 'colors', 'came', 'out']

out = split_string("First Name,Last Name,Street Address,City,State,Zip Code",",")
print out
#>>>['First Name', 'Last Name', 'Street Address', 'City', 'State', 'Zip Code']

Improving the Index

# -*- coding: utf-8 -*-

# The current index includes a url in the list of urls
# for a keyword multiple times if the keyword appears
# on that page more than once.

# It might be better to only include the same url
# once in the url list for a keyword, even if it appears
# many times.

# Modify add_to_index so that a given url is only
# included once in the url list for a keyword,
# no matter how many times that keyword appears.

def add_to_index(index, keyword, url):
    for entry in index:
        if entry[0] == keyword:
            if url not in entry[1]:
                entry[1].append(url)
            return
    # not found, add new keyword to index
    index.append([keyword, [url]])


def get_page(url):
    try:
        if url == "http://www.udacity.com/cs101x/index.html":
            return '''  This is a test page for learning to crawl!

It is a good idea to learn to crawl before you try to walk or fly.

''' elif url == "http://www.udacity.com/cs101x/crawling.html": return ''' I have not learned to crawl yet, but I am quite good at kicking. ''' elif url == "http://www.udacity.com/cs101x/walking.html": return ''' I cant get enough crawling''' elif url == "http://www.udacity.com/cs101x/flying.html": return ''' The magic words are Squeamish Ossifrage!''' except: return "" return "" def union(a, b): for e in b: if e not in a: a.append(e) def get_next_target(page): start_link = page.find('>> ['http://www.udacity.com/cs101x/index.html']

Counting Clicks



# 2 Gold Stars

# One way search engines rank pages
# is to count the number of times a
# searcher clicks on a returned link.
# This indicates that the person doing
# the query thought this was a useful
# link for the query, so it should be
# higher in the rankings next time.

# (In Unit 6, we will look at a different
# way of ranking pages that does not depend
# on user clicks.)

# ___Modify the index___ such that for each url in a
# list for a keyword, there is also a number
# that counts the number of times a user
# clicks on that link for this keyword.

# The result of lookup(index,keyword) should
# now be a list of url entries, where each url
# entry is a list of a url and a number
# indicating the number of times that url
# was clicked for this query keyword.

# You should define ___a new procedure___ to simulate
# user clicks for a given link:

# ___record_user_click(index,word,url)___

# that modifies the entry in the index for
# the input word by increasing the count associated
# with the url by 1.

# You also will have to ____modify add_to_index____
# in order to correctly create the new data
# structure, and to prevent the repetition of
# entries as in homework 4-5.


def record_user_click(index,keyword,url):
    for entry in index:
        if entry[0] == keyword:
            for item in entry[1]:
                if item[0] == url:
                    item[1] +=1
                    return


def add_to_index(index, keyword, url):
    for entry in index:
        if entry[0] == keyword:
            for item in entry[1]:
                if item[0] == url:
                    return
            entry[1].append([url, 0])
    # not found, add new keyword to index
    index.append([keyword, [[url, 0]]])


def get_page(url):
    try:
        if url == "http://www.udacity.com/cs101x/index.html":
            return '''  This is a test page for learning to crawl!

It is a good idea to learn to crawl before you try to walk or fly.

''' elif url == "http://www.udacity.com/cs101x/crawling.html": return ''' I have not learned to crawl yet, but I am quite good at kicking. ''' elif url == "http://www.udacity.com/cs101x/walking.html": return ''' I cant get enough crawling!''' elif url == "http://www.udacity.com/cs101x/flying.html": return 'The magic words are Squeamish Ossifrage!' except: return "" return "" def union(a, b): for e in b: if e not in a: a.append(e) def get_next_target(page): start_link = page.find('>> [['http://www.udacity.com/cs101x/index.html', 0], #>>> ['http://www.udacity.com/cs101x/crawling.html', 0]] record_user_click(index, 'good', 'http://www.udacity.com/cs101x/crawling.html') print lookup(index, 'good') #>>> [['http://www.udacity.com/cs101x/index.html', 0], #>>> ['http://www.udacity.com/cs101x/crawling.html', 1]]

Word Count

# -*- coding: utf-8 -*-

# Write a procedure, count_words, which takes as input a string
# and returns the number of words in the string. You may consider words
# as strings of characters separated by spaces.

def count_words(strings):
    word = ""
    r = 0
    for char in strings:
        if char != " ":
            word += char
        else:
            if word != "":
                r += 1
            word = ""
    if not word == "":
        r += 1
    return r


passage =("The number of orderings of the 52 cards in a deck of cards "
"is so great that if every one of the almost 7 billion people alive "
"today dealt one ordering of the cards per second, it would take "
"2.5 * 10**40 times the age of the universe to order the cards in every "
"possible way.")

#print passage
#>>>The number of orderings of the 52 cards in a deck of cards is so great that if every one of the almost 7 billion people alive today dealt one ordering of the cards per second, it would take 2.5 * 10**40 times the age of the universe to order the cards in every possible way.

print count_words(passage)
#>>>56

speed_fraction

有关这个练习当中我出错的地方,写了个笔记。

下面是正确的代码:

# -*- coding: utf-8 -*-

# Write a procedure, speed_fraction, which takes as its inputs the result of
# a traceroute (in ms) and distance (in km) between two points. It should
# return the speed the data travels as a decimal fraction of the speed of
# light.

speed_of_light = 300000. # km per second

def speed_fraction(traceroute, distance):
    speed = distance / (traceroute*1.0/2/1000.)
    return speed / (speed_of_light)

print speed_fraction(50,5000)
#>>> 0.666666666667

print speed_fraction(50,10000)
#>>> 1.33333333333  # Any thoughts about this answer, or these inputs?

print speed_fraction(75,4500)
#>>> 0.4

Converting Seconds

# -*- coding: utf-8 -*-

# Write a procedure, convert_seconds, which takes as input a non-negative
# number of seconds and returns a string of the form
# ' hours,  minutes,  seconds' but
# where if  is 1 for the number of hours or minutes,
# then it should be hour/minute. Further,  may be an integer
# or decimal, and if it is 1, then it should be followed by second.
# You might need to use int() to turn a decimal into a float depending
# on how you code this. int(3.0) gives 3
#
# Note that English uses the plural when talking about 0 items, so
# it should be "0 minutes".
#

#算法: 余数
# 1 hour = 3600 seconds
# 1 minute = 60 seconds


def convert_seconds(time):
    hour = int(time // 3600)
    time -= hour * 3600
    minute = int(time // 60)
    time -= minute * 60
    second = time
    result = ""
    if hour == 1:
        result += "1 hour, "
    else:
        result += "%s hours, " % hour
    if minute == 1:
        result += "1 minute, "
    else:
        result += "%s minutes, " % minute
    if second == 1:
        result += "1 second"
    else:
        result += "%s seconds" % second
    return result





print convert_seconds(3661)
#>>> 1 hour, 1 minute, 1 second

print convert_seconds(7325)
#>>> 2 hours, 2 minutes, 5 seconds

print convert_seconds(7261.7)
#>>> 2 hours, 1 minute, 1.7 seconds

Download Calculator

# -*- coding: utf-8 -*-

# Write a procedure download_time which takes as inputs a file size, the
# units that file size is given in, bandwidth and the units for
# bandwidth (excluding per second) and returns the time taken to download
# the file.
# Your answer should be a string in the form
# " hours,  minutes,  seconds"

# Some information you might find useful is the number of bits
# in kilobits (kb), kilobytes (kB), megabits (Mb), megabytes (MB),
# gigabits (Gb), gigabytes (GB) and terabits (Tb), terabytes (TB).

#print 2 ** 10      # one kilobit, kb
#print 2 ** 10 * 8  # one kilobyte, kB

#print 2 ** 20      # one megabit, Mb
#print 2 ** 20 * 8  # one megabyte, MB

#print 2 ** 30      # one gigabit, Gb
#print 2 ** 30 * 8  # one gigabyte, GB

#print 2 ** 40      # one terabit, Tb
#print 2 ** 40 * 8  # one terabyte, TB

# Often bandwidth is given in megabits (Mb) per second whereas file size
# is given in megabytes (MB).

def download_time(file_num, file_unit, bandwidth_num, bandwidth_unit):
    tobit = [
    ["kb", 2**10],
    ["kB", 2**10*8],
    ["Mb", 2**20],
    ["MB", 2**20*8],
    ["Gb", 2**30],
    ["GB", 2**30*8],
    ["Tb", 2**40],
    ["TB", 2**40*8]
    ]
    for entry in tobit:
        if entry[0] == file_unit:
            total_bit_file = entry[1] * file_num
        if entry[0] == bandwidth_unit:
            total_bit_bandwidth = entry[1] * bandwidth_num
    dl_seconds = total_bit_file * 1.0 / total_bit_bandwidth
    return convert_seconds(dl_seconds)


def convert_seconds(time):
    hour = int(time // 3600)
    time -= hour * 3600
    minute = int(time // 60)
    time -= minute * 60
    second = time
    result = ""
    if hour == 1:
        result += "1 hour, "
    else:
        result += "%s hours, " % hour
    if minute == 1:
        result += "1 minute, "
    else:
        result += "%s minutes, " % minute
    if second == 1:
        result += "1 second"
    else:
        result += "%s seconds" % second
    return result





print download_time(1024,'kB', 1, 'MB')
#>>> 0 hours, 0 minutes, 1 second

print download_time(1024,'kB', 1, 'Mb')
#>>> 0 hours, 0 minutes, 8 seconds  # 8.0 seconds is also acceptable

print download_time(13,'GB', 5.6, 'MB')
#>>> 0 hours, 39 minutes, 37.1428571429 seconds

print download_time(13,'GB', 5.6, 'Mb')
#>>> 5 hours, 16 minutes, 57.1428571429 seconds

print download_time(10,'MB', 2, 'kB')
#>>> 1 hour, 25 minutes, 20 seconds  # 20.0 seconds is also acceptable

print download_time(10,'MB', 2, 'kb')
#>>> 11 hours, 22 minutes, 40 seconds  # 40.0 seconds is also acceptable

你可能感兴趣的:(Python_21_Udacity_Evans_Intro to CS_4_Responding to queries)