import collections import bs4 import requests r = requests.get('http://www.geog.leeds.ac.uk/courses/computing/practice/python/core/n-grams/examples/poe.html') text = r.text soup = bs4.BeautifulSoup(text, 'html.parser') p = soup.find_all('p') text = p[0].text text = text.replace(",", "") text = text.replace("!", "") text = text.replace("-", " ") text = text.replace(".", "") text = text.replace("\n", " ") text = text.replace("
", "") text = text.replace("–", " ") text = text.replace(" ", " ") text = text.replace(" ", " ") text = text.lower() text_list = text.split(" ") counter = collections.Counter() for i in range(len(text_list) - 1): counter[text_list[i] + " " + text_list[i + 1]] += 1 print(counter.most_common(10))