Thiết kế website giá rẻ

Question

I have been working on PageRank for the CS50 AI course, my code:

import os
import random
import re
import sys

from numpy.random import choice

DAMPING = 0.85
SAMPLES = 10000


def main():
    if len(sys.argv) != 2:
        sys.exit("Usage: python pagerank.py corpus")
    corpus = crawl(sys.argv[1])
    k = sample_pagerank(corpus, DAMPING, SAMPLES)
    print(f"PageRank Results from Sampling (n = {SAMPLES})")
    for page in sorted(k):
        print(f"  {page}: {k[page]:.4f}")
    k = iterate_pagerank(corpus, DAMPING)
    print(f"PageRank Results from Iteration")
    for page in sorted(k):
        print(f"  {page}: {k[page]:.4f}")


def crawl(directory):
    """
    Parse a directory of HTML pages and check for links to other pages.
    Return a dictionary where each key is a page, and values are
    a list of all other pages in the corpus that are linked to by the page.
    """
    pages = dict()

    # Extract all links from HTML files
    for filename in os.listdir(directory):
        if not filename.endswith(".html"):
            continue
        with open(os.path.join(directory, filename)) as f:
            contents = f.read()
            links = re.findall(r"<as+(?:[^>]*?)href="([^"]*)"", contents)
            pages[filename] = set(links) - {filename}

    # Only include links to other pages in the corpus
    for filename in pages:
        pages[filename] = set(
            link for link in pages[filename]
            if link in pages
        )

    return pages


def transition_model(corpus, page, damping_factor):
    """
    Return a probability distribution over which page to visit next,
    given a current page.

    With probability `damping_factor`, choose a link at random
    linked to by `page`. With probability `1 - damping_factor`, choose
    a link at random chosen from all pages in the corpus.
    """
    
    l = len(corpus[page]) # number of links for that page
    k = dict()
    if l == 0:
        for i in corpus.keys():
            k[i] = 1/len(corpus)
    else:
        for i in corpus[page]:
            k[i] = damping_factor / l + (1 - damping_factor) / (l + 1)
        k[page] = (1 - damping_factor) / (l + 1)

    return k


def sample_pagerank(corpus, damping_factor, n):
    """
    Return PageRank values for each page by sampling `n` pages
    according to transition model, starting with a page at random.

    Return a dictionary where keys are page names, and values are
    their estimated PageRank value (a value between 0 and 1). All
    PageRank values should sum to 1.
    """
    k = dict()
    counts = dict()
    pages = list(corpus.keys())
    page = random.choice(pages)
    for i in range(0, n - 1):
        probs = transition_model(corpus, page, damping_factor)
        psub = list(probs.keys())
        probs = list(probs.values())
        if page in counts:
            counts[page] += 1
        else:
            counts[page] = 1
        
        page = choice(psub, p=probs)
    for i in counts.keys():
        k[i] = counts[i] / n
    return k


def iterate_pagerank(corpus, damping_factor):
    """
    Return PageRank values for each page by iteratively updating
    PageRank values until convergence.

    Return a dictionary where keys are page names, and values are
    their estimated PageRank value (a value between 0 and 1). All
    PageRank values should sum to 1.
    """
    # if any page has no links give it a link to every page
    for i in corpus:
        if len(corpus[i]) == 0:
            corpus[i] = set(corpus.keys())
    N = len(corpus)
    # initialize ranks
    k = {i: 1 / N for i in corpus}
    
    def pagerank(page):
        # start the algorithm
        r = (1 - damping_factor) / N 
        rt = 0
        # second part of algorithm
        for i in corpus:
            if page in corpus[i]:
                rt += k[i] / len(corpus[i])
        rt = damping_factor * rt
        return r + rt
    
    while True:
        # new ranks
        kn = {i: pagerank(i) for i in corpus}
        # maximum changes to ranks
        c = max(abs(kn[i] - k[i]) for i in corpus)
        k = kn
        # do this until changes becomes less that 0.001
        if c < 0.001:
            break
    return k
    
if __name__ == "__main__":
    main()

When I check it using check50 it gives this error relating to the iterate_pagerank function:
check50 error
Saying that because pagerank 1 was 0.2282 and instead should be in the range [0.05996, 0.15996].

Because check50 gave the corpus and the expected range, I tried replacing line 15 with:

corpus = {'1': {'2'}, '2': {'3', '1'}, '3': {'2', '4'}, '4': {'2'}, '5': {'6'}, '6': {'7', '5'}, '7': {'6', '8'}, '8': {'6'}}

My result:

PageRank Results from Iteration
  1: 0.1101
  2: 0.2144
  3: 0.1101
  4: 0.0653
  5: 0.1101
  6: 0.2144
  7: 0.1101
  8: 0.0653

But the rank for page 1 is 0.1101, and that is between the range 0.05996 to 0.15996.

Is check50 wrong or am I missing something?

Thiết kế website giá rẻ

Danh mục

Check50 seems to be returning a wrong result in PageRank