sqlite - How to avoid memory error in python when retrieving from database -
when try run code large amount of data, memoryerror in all_rows = [[x[0], x[1]] x in cur]
. have 200m rows. how can avoid it?
binwidth = 1 latitudes = [] userids = [] info = [] densities = [] lite.connect(databasepath) con: cur = con.execute('select latitude, userid dynamicmessage latitude>45') print "executed" all_rows = [[x[0], x[1]] x in cur] all_rows = sorted(all_rows, key=itemgetter(0)) print "sorted" x in all_rows: latitudes.append(x[0]) userids.append(x[1]) min_lat = -100 max_lat = 100 binwidth = 1 bin_range = np.arange(min_lat,max_lat,binwidth) binned_latitudes = np.digitize(latitudes,bin_range) all_in_bins = zip(binned_latitudes,userids) unique_in_bins = list(set(all_in_bins)) all_in_bins.sort() unique_in_bins.sort() bin_count_all = [] bin, group in groupby(all_in_bins, lambda x: x[0]): bin_count_all += [(bin, len([k k in group]))] bin_count_unique = [] bin, group in groupby(unique_in_bins, lambda x: x[0]): bin_count_unique += [(bin, len([ k k in group]))] bin_density = [(bin_range[b-1],a*1.0/u) ((b,a),(_,u)) in zip(bin_count_all, bin_count_unique)] bin_density = np.array(bin_density).transpose() # all_in_bins , unique_in_bins contain data # corresponding sql / pseudocode in question # plot standard bar - note can put uneven widths in array-like here if necessary plt.bar(*bin_density, width=binwidth) plt.savefig('latlongstats'+'t'+str(time.strftime("%h:%m:%s")), format='png')
Comments
Post a Comment