-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathextract-historic-public-prices.py
97 lines (80 loc) · 2.52 KB
/
extract-historic-public-prices.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import datetime
import csv
import urllib, urllib2
from zipfile import ZipFile
import json
# parameters
rootdir = '/home/hsenot/Data/AEMO/PUBLIC_PRICES'
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
# Store the data from the CSV in a file
out_f = open("../data/nem_historic_prices.csv","w")
out_f.write("dort,aemo,settlement_date,settlement_time,region,rrp\n")
for subdir, dirs, files in os.walk(rootdir):
files.sort()
for file in files:
# Filename
zf = ZipFile(subdir+'/'+file,'r')
# Listing the resources in the zip file - there is only 1
zfnl = zf.namelist()
print 'Filename to extract from the archive: '+ zfnl[0]
f = zf.open(zfnl[0])
# It's a CSV file - we extract info in a format ready to be JSONified
reader = csv.reader(f, delimiter=',', quoting=csv.QUOTE_NONE)
zf.close()
# Some info on the public prices CSV files
# http://www.whit.com.au/blog/2012/07/all-you-need-to-analyse-the-electricity-market-pt-3/
for row in reader:
if len(row)>1:
# Data rows identified with D flag in first column
if row[0] == "D":
# Selection of the right columns to consider
f_dort = row[1][0:1]
f_aemo = row[3]
f_date = row[4].split(" ")[0].replace("\"", "")
f_time = row[4].split(" ")[1].replace("\"", "")
f_region = row[6][:-1]
# The RRP column might be in position 7 or 8
if f_dort == "D":
f_price = row[8]
else:
f_price = row[7]
# Writing a well-formed CSV line
out_f.write("\""+f_dort+"\";"+f_aemo+";\""+f_date+"\";\""+f_time+"\";\""+f_region+"\";"+f_price+"\n")
f.close()
out_f.close()
# The resulting CSV file can be loaded in PostgreSQL using:
# The destination structure:
# CREATE TABLE nem_public_price
# (
# id serial NOT NULL,
# dort character(1),
# aemo smallint,
# settlement_date date,
# settlement_time time without time zone,
# region character varying(5),
# rrp numeric(9,2),
# CONSTRAINT pk_nem_public_price PRIMARY KEY (id)
# )
# The loading command:
# copy nem_public_price (dort,aemo,settlement_date,settlement_time,region,rrp)
# from '/var/lib/tomcat6/webapps/empower.me/data/aemo-json/data/nem_historic_prices.csv'
# CSV HEADER DELIMITER ';'
# A reduced table that only has the 30mn (final) spot price:
#
# drop table nem_public_price_mini cascade;
#
# create table nem_public_price_mini as
# select
# id, region, settlement_date, settlement_time,rrp
# from
# nem_public_price
# where
# dort='T' and aemo=1
# order by
# region, settlement_date, settlement_time