From 78961bedca4839fe727bea578092ba06857f37aa Mon Sep 17 00:00:00 2001 From: ipl31 Date: Sun, 11 Nov 2012 13:20:03 -0800 Subject: [PATCH 1/2] improve parsing of symbols.txt to handle blank lines and spaces after symbols. using strformating instead of concatenation Former-commit-id: e937ca7f6063ca461ff7ceba8d719a0240d0fabe Former-commit-id: 7cc5714f5e8371ec5a9e2c1a8e4cf443b5c09b69 --- Tools/YahooDataPull.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/Tools/YahooDataPull.py b/Tools/YahooDataPull.py index 6bba4f765..b028fe864 100644 --- a/Tools/YahooDataPull.py +++ b/Tools/YahooDataPull.py @@ -5,31 +5,30 @@ import urllib2 import urllib import datetime -import sys import os def get_data(data_path, ls_symbols): - #Create path if it doesn't exist + # Create path if it doesn't exist if not (os.access(data_path, os.F_OK)): os.makedirs(data_path) - #utils.clean_paths(data_path) + # utils.clean_paths(data_path) _now =datetime.datetime.now(); - miss_ctr=0; #Counts how many symbols we could get + miss_ctr=0; #Counts how many symbols we could not get for symbol in ls_symbols: - symbol_name = symbol if symbol[0] == '$': symbol = '^' + symbol[1:] symbol_data=list() - #print "Getting " + str (symbol_name) + # print "Getting {0}".format(symbol) try: - params= urllib.urlencode ({'a':1, 'b':1, 'c':2000, 'd':_now.month, 'e':_now.day, 'f':_now.year, 's': str(symbol)}) - url_get= urllib2.urlopen("http://ichart.finance.yahoo.com/table.csv?%s" % params) + params= urllib.urlencode ({'a':1, 'b':1, 'c':2000, 'd':_now.month, 'e':_now.day, 'f':_now.year, 's': symbol}) + url = "http://ichart.finance.yahoo.com/table.csv?%s" % params + url_get= urllib2.urlopen(url) header= url_get.readline() symbol_data.append (url_get.readline()) @@ -38,7 +37,7 @@ def get_data(data_path, ls_symbols): symbol_data.pop(-1) #The last element is going to be the string of length zero. We don't want to write that to file. #now writing data to file - f= open (data_path + symbol_name + ".csv", 'w') + f= open (data_path + symbol + ".csv", 'w') #Writing the header f.write (header) @@ -49,20 +48,22 @@ def get_data(data_path, ls_symbols): f.close(); except urllib2.HTTPError: - miss_ctr= miss_ctr+1 - print "Unable to fetch data for stock: " + str (symbol_name) + miss_ctr += 1 + print "Unable to fetch data for stock: {0}".format(symbol) except urllib2.URLError: - print "URL Error for stock: " + str (symbol_name) + miss_ctr += 1 + print "URL Error for stock: {0}".format(symbol) - print "All done. Got " + str (len(ls_symbols) - miss_ctr) + " stocks. Could not get " + str (miss_ctr) + " stocks." + print "All done. Got {0} stocks. Could not get {1}".format(len(ls_symbols) - miss_ctr, miss_ctr) def read_symbols(s_symbols_file): ls_symbols=[] file = open(s_symbols_file, 'r') - for f in file.readlines(): - j = f[:-1] - ls_symbols.append(j) + for line in file.readlines(): + str_line = str(line) + if str_line.strip(): + ls_symbols.append(str_line.strip()) file.close() return ls_symbols From 848d7c9f96627c1d8015421282ade6d4116f6dd9 Mon Sep 17 00:00:00 2001 From: ipl31 Date: Sun, 11 Nov 2012 14:11:09 -0800 Subject: [PATCH 2/2] Adding symbol_name back in to preserve Index symbol names due to possible conversion of $ to ^ Former-commit-id: c5c1d4705c032ba9e32d0c3274751df1b3752641 Former-commit-id: ca3a2222c0060179348bbdeeb97b62e798782304 --- Tools/YahooDataPull.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Tools/YahooDataPull.py b/Tools/YahooDataPull.py index b028fe864..eb5b2e922 100644 --- a/Tools/YahooDataPull.py +++ b/Tools/YahooDataPull.py @@ -19,6 +19,9 @@ def get_data(data_path, ls_symbols): _now =datetime.datetime.now(); miss_ctr=0; #Counts how many symbols we could not get for symbol in ls_symbols: + # Preserve original symbol since it might + # get manipulated if it starts with a "$" + symbol_name = symbol if symbol[0] == '$': symbol = '^' + symbol[1:] @@ -37,7 +40,7 @@ def get_data(data_path, ls_symbols): symbol_data.pop(-1) #The last element is going to be the string of length zero. We don't want to write that to file. #now writing data to file - f= open (data_path + symbol + ".csv", 'w') + f= open (data_path + symbol_name + ".csv", 'w') #Writing the header f.write (header) @@ -49,10 +52,10 @@ def get_data(data_path, ls_symbols): except urllib2.HTTPError: miss_ctr += 1 - print "Unable to fetch data for stock: {0}".format(symbol) + print "Unable to fetch data for stock: {0} at {1}".format(symbol_name, url) except urllib2.URLError: miss_ctr += 1 - print "URL Error for stock: {0}".format(symbol) + print "URL Error for stock: {0} at {1}".format(symbol_name, url) print "All done. Got {0} stocks. Could not get {1}".format(len(ls_symbols) - miss_ctr, miss_ctr)