Lecture 23 - Files

Lecture Date: Friday, March 18

We’re going to keep looking at files today! We’re going to work with a couple large data sets and see if we can write some interesting programs.

Download these data sets and put them into your PyCharm project directory:

Here are some other datasets! http://introcs.cs.princeton.edu/java/data/

If we get to it today, we’ll look at reading datasets directly from the web, like our weather data: http://www.wunderground.com/history/airport/KCHO/2015/10/15/DailyHistory.html?format=1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def load_spelling_file(filename):
    correct_spelling = {}
    misspelling = {}

    datafile = open(filename, "r")
    for line in datafile:
        line = line.split(",")
        correct_spelling[line[1].strip()] = line[0]
        misspelling[line[0]] = line[1].strip()

    return correct_spelling, misspelling

correct_spelling, misspelling = load_spelling_file("misspellings.csv")
done = False
while not done:
    word = input("Please enter a word (END to quit): ")
    if word == "END":
        done = True
        break
    if word in correct_spelling:
        print("The word '", word, "' is spelled correctly!")
    elif word in misspelling:
        print("I think that '", word, "' is spelled", misspelling[word])
    else:
        print("I don't know that word.  Sorry!")
1
2
3
4
5
6
7
8
9
import urllib.request

link = input ( 'Web page: ' )

stream = urllib.request.urlopen( link )

for line in stream:
    decoded = line.decode("UTF-8")
    print(decoded.strip())
1
2
3
4
5
6
7
8
9
10
import urllib.request
year = "2012"
month = "03"
day = "17"
url = "http://www.wunderground.com/history/airport/KCHO/" + year + "/" + month +"/" + day + "/DailyHistory.html?format=1"

stream = urllib.request.urlopen(url)
for line in stream:
    decoded = line.decode("UTF-8").strip().split(",")
    print(decoded)