#!/usr/bin/env python2
"""
ws_richland_json.py
Description: a JSON gleaner for collecting weather data
Written by: Jim Miller
11:27 PM Fri March 19, 2021
"""
# Sample JSON output (see URL query for JSON feed below):
"""
{
"UNITS": {
"wind_speed": "knots",
"air_temp": "Fahrenheit",
"wind_gust": "knots",
"dew_point_temperature": "Fahrenheit",
"wind_direction": "Degrees",
"sea_level_pressure": "Millibars"
},
"QC_SUMMARY": {
"QC_SHORTNAMES": {
"1": "sl_range_check"
},
"QC_CHECKS_APPLIED": [
"sl_range_check"
],
"PERCENT_OF_TOTAL_OBSERVATIONS_FLAGGED": 0.0,
"QC_SOURCENAMES": {
"1": "SynopticLabs"
},
"TOTAL_OBSERVATIONS_FLAGGED": 0.0,
"QC_NAMES": {
"1": "SynopticLabs Range Check"
}
},
"STATION": [
{
"STATUS": "ACTIVE",
"MNET_ID": "1",
"PERIOD_OF_RECORD": {
"start": "2002-08-14T00:00:00Z",
"end": "2022-12-12T16:56:00Z"
},
"ELEVATION": "1020",
"NAME": "Mankato, Mankato Regional Airport",
"STID": "KMKT",
"SENSOR_VARIABLES": {
"wind_speed": {
"wind_speed_value_1": {
"period_of_record": {
"start": "",
"end": ""
}
}
},
"air_temp": {
"air_temp_value_1": {
"period_of_record": {
"start": "",
"end": ""
}
}
},
"wind_gust": {
"wind_gust_value_1": {
"period_of_record": {
"start": "",
"end": ""
}
}
},
"wind_direction": {
"wind_direction_value_1": {
"period_of_record": {
"start": "",
"end": ""
}
}
},
"sea_level_pressure": {
"sea_level_pressure_value_1": {
"period_of_record": {
"start": "",
"end": ""
}
},
"sea_level_pressure_value_1d": {
"derived_from": [
"pressure_value_1d",
"air_temp_value_1",
"relative_humidity_value_1"
]
}
},
"dew_point_temperature": {
"dew_point_temperature_value_1d": {
"derived_from": [
"air_temp_value_1",
"relative_humidity_value_1"
]
}
}
},
"ELEV_DEM": "1013.8",
"LONGITUDE": "-93.91667",
"UNITS": {
"position": "ft",
"elevation": "ft"
},
"STATE": "MN",
"OBSERVATIONS": {
"wind_gust_value_1": {
"date_time": "2022-12-12T10:45:00-0600",
"value": 24.0
},
"dew_point_temperature_value_1d": {
"date_time": "2022-12-12T11:35:00-0600",
"value": 26.55
},
"wind_direction_value_1": {
"date_time": "2022-12-12T11:35:00-0600",
"value": 130.0
},
"sea_level_pressure_value_1d": {
"date_time": "2022-12-12T11:35:00-0600",
"value": 1023.59
},
"air_temp_value_1": {
"date_time": "2022-12-12T11:35:00-0600",
"value": 30.2
},
"sea_level_pressure_value_1": {
"date_time": "2022-12-12T10:56:00-0600",
"value": 1024.0
},
"wind_speed_value_1": {
"date_time": "2022-12-12T11:35:00-0600",
"value": 19.0
}
},
"RESTRICTED": false,
"QC_FLAGGED": false,
"LATITUDE": "44.21667",
"TIMEZONE": "America\/Chicago",
"ID": "5015"
}
],
"SUMMARY": {
"DATA_QUERY_TIME": "3.86095046997 ms",
"RESPONSE_CODE": 1,
"RESPONSE_MESSAGE": "OK",
"METADATA_RESPONSE_TIME": "651.8638134 ms",
"DATA_PARSING_TIME": "0.997066497803 ms",
"VERSION": "v2.17.0",
"TOTAL_DATA_TIME": "4.85897064209 ms",
"NUMBER_OF_OBJECTS": 1
}
}
"""
# Import supporting modules
import sys, os
import string
# HTML fetching
import requests
import json
import math
# Windows extensions (pywin32-220.win32-py2.7.exe)
import win32com.client
import pywintypes # supports exceptions names
import time
import datetime
"""
Functions
"""
def updateMaxStationDateTime( stationDateTime):
# Check to see if stationDateTime is later than what's in the global.
# and update the value in the global if it is...
# Must declare locally to change a global. If no global declaration, the
# ASSIGNMENT statement will force this variable to be local here in this
# function.
global stationDateTime_maxValue
if (stationDateTime > stationDateTime_maxValue):
stationDateTime_maxValue = stationDateTime
def inQu( string):
# Put everything but Null into single quotes for the SQL string.
# Note: best practice would be to not quote numeric values, quote only strings and dates.
if (string == "Null"):
finalString = string
else:
finalString = "'" + str( string) + "'"
return finalString
def nN( string):
# When writing to the spreadsheet, Nulls should be represented as None
if (string == "Null"):
value = None
else:
value = string
return value
def utc( localTimeString):
# e.g. "2022-12-12T11:35:00-0600"
# 54321
dateTimeFromStamp = datetime.datetime.strptime( localTimeString[:-5], "%Y-%m-%dT%H:%M:%S")
hoursFromUTC = int( localTimeString[-5:-2])
utcTime = dateTimeFromStamp - datetime.timedelta( hours=hoursFromUTC)
utcTimeString = str( utcTime)
return utcTimeString
def getSensorDataDict( stationData, sensorName):
sensorDict = stationData["SENSOR_VARIABLES"].get( sensorName, "no report")
if (sensorDict != "no report"):
sensorValueName = sensorDict.keys()[0]
sensorDataDict = stationData["OBSERVATIONS"][sensorValueName]
else:
sensorDataDict = {'value':'Null'}
return sensorDataDict
def processMultipleStations_json( station_dictionary):
# Build the URL string to run a query for multiple stations.
# (returned on one page).
# First, here are some examples of single-station queries.
# Old XML query:
# http://www.wrh.noaa.gov/mesowest/getobextXml.php?num=1&sid=KRLD
# Using their JSON feed and a token:
# https://api.synopticdata.com/v2/stations/latest?vars=air_temp,dew_point_temperature,wind_speed,wind_direction,wind_gust,sea_level_pressure&obtimezone=local&output=json&units=english&token=1f652521138243d3bfe7a445710896e6&stid=KMKT
url_base = ("https://api.synopticdata.com/v2/stations/latest?" +
"vars=air_temp,dew_point_temperature,wind_speed,wind_direction,wind_gust,sea_level_pressure" +
"&obtimezone=local&output=json&units=english" +
"&token=45d3bd33f12c4d87aed5925e0f4da854")
station_count = 0
write_count = 0
# This index keeps track of successful writes that happen when attempting
# to write observations other then the first one.
write_count_notFirst = 0
stationString = "&stid="
print ""
for station_name in station_dictionary.keys():
print "station name =", station_name
station_count += 1
stationString += station_name + ","
webpage_url = url_base + stationString[:-1]
print "url = " + webpage_url
try:
# Fetch the JSON page
jsonRequest = requests.get( webpage_url)
jsonObject = jsonRequest.json()
formatedJson = json.dumps( jsonObject, indent=2)
#print formatedJson
except:
message_str = "Error opening json url"
enterInLog( message_str)
print message_str + ", URL = " + webpage_url
rowsForSpreadsheet = []
for stationData in jsonObject["STATION"]:
print ""
print stationData["STID"]
print stationData["NAME"]
for sensorName in stationData["SENSOR_VARIABLES"]:
sensorData = getSensorDataDict( stationData, sensorName)
print sensorName + ", " + str( sensorData["value"]) + ", " + sensorData["date_time"]
# Do conversions and then populated the weather dictionary
temp_f = getSensorDataDict( stationData, "air_temp")["value"]
dewPoint_f = getSensorDataDict( stationData, "dew_point_temperature")["value"]
# 2020-12-01T11:05:00-0600
timestamp_literal = getSensorDataDict( stationData, "air_temp")["date_time"]
# 2021-03-19 07:56:00
dateTimeFromStamp = datetime.datetime.strptime( timestamp_literal[:-5], "%Y-%m-%dT%H:%M:%S")
# This check for daylight savings time (dst) uses local gleaner time (T=1, F=0).
# So, if the gleaner computer sees dst, and the station is in an area where dst is used (everywhere but Hawaii),
# then change it back to standard time.
dstFlag = time.localtime()[-1]
if (dstFlag and station_dic[ stationData['STID']]['dst']):
dateTimeFromStamp_std = dateTimeFromStamp - datetime.timedelta( hours=1)
else:
dateTimeFromStamp_std = dateTimeFromStamp
# 2021-03-19 07:56:00
dateTimeFromStamp_std_string = str( dateTimeFromStamp_std)
updateMaxStationDateTime( dateTimeFromStamp_std)
timestamp_date = dateTimeFromStamp_std_string.split(" ")[0]
dateParts = timestamp_date.split("-")
ts_year = dateParts[0]
ts_month = dateParts[1]
ts_day = dateParts[2]
timestamp_time = dateTimeFromStamp_std_string.split(" ")[1]
timeParts = timestamp_time.split(":")
ts_hour = timeParts[0]
ts_min = timeParts[1]
ts_sec = timeParts[2]
timestamp = "%s/%s/%s %s:%s:%s" % ( ts_month, ts_day, ts_year, ts_hour, ts_min, ts_sec)
mdy = "%s/%s/%s" % ( ts_month, ts_day, ts_year)
windDirection_deg = getSensorDataDict( stationData, "wind_direction")["value"]
windDirection_timeStamp = getSensorDataDict( stationData, "wind_direction")["date_time"]
windSpeed_mph = round( getSensorDataDict( stationData, "wind_speed")["value"] * knots_to_mph, 1)
windGust_timeStamp = getSensorDataDict( stationData, "wind_gust")["date_time"]
if (windGust_timeStamp == windDirection_timeStamp):
windGust_mph = round( getSensorDataDict( stationData, "wind_gust")["value"] * knots_to_mph, 1)
else:
windGust_mph = windSpeed_mph
# Some stations don't have a pressure sensor
pressure_raw = getSensorDataDict( stationData, "sea_level_pressure")["value"]
if (pressure_raw != 'Null'):
pressure_inHg = round( pressure_raw * 0.02953, 2) # from millibars
else:
pressure_inHg = "Null"
stationName = station_dic[ stationData['STID']]['longName']
# Populate the weather dictionary: time and sensor data.
weather_dic = {'station_number': inQu( station_dic[ stationData['STID']]['ID']),
'station_name': inQu( stationName),
'epoch_at_write': inQu( math.trunc( time.time())),
'timeStamp_on_drybulb': inQu( timestamp_literal),
'time_native_std': inQu( timestamp),
'MDY': inQu( mdy),
'Hr': inQu( ts_hour),
'Min': inQu( ts_min),
'T_drybulb': inQu( temp_f),
'T_dewpoint': inQu( dewPoint_f),
'wind_direction': inQu( windDirection_deg),
'DDCARD':'Null',
'wind_speed': inQu( windSpeed_mph),
'wind_gust': inQu( windGust_mph),
'ALTSE': inQu( pressure_inHg),
'P':'Null'}
print json.dumps( weather_dic, indent=2)
try:
# Develop SQL string
sqlForStation = build_SQL_string( weather_dic)
# Run the SQL (send formal SQL string and info list). If a
# successful write is indicated, bump up the counter.
if (runSQL( sqlForStation[0], sqlForStation[1])):
write_count += 1
# Add this station's record to the spreadsheet array.
newRow = [stationName, utc( timestamp_literal), nN( temp_f), nN( dewPoint_f), nN( windDirection_deg), nN( windSpeed_mph), nN( windGust_mph), nN( pressure_inHg)]
rowsForSpreadsheet.append( newRow)
except:
# Note the backslash is a line-continuation character.
message_str = "general error in Station loop ::: %s ==> %s, \nStation name = %s" \
% (sys.exc_type, sys.exc_value,
getXMLvalueInStationGroup( dom_object, data_index, "station_id"))
enterInLog( message_str)
print message_str
print "SQL construction failed."
# If there was a successful write to database, make entry in log showing
# the number of successful gleans from the web site and the number of
# successful writes to the database.
# W = Writes
# P = Possible (number of stations collecting from)
print ""
message_str = "Data write record: %s P, %s W" % ( station_count, write_count)
print message_str
if (write_count > 0):
enterInLog( message_str)
attemptWriteToDaysGleaned()
write_to_spreadsheet( rowsForSpreadsheet)
def write_to_spreadsheet( data):
sheet_url = "https://script.google.com/macros/s/AKfycbzoEtsp1DLhvtt8OVrgwkhAzab5D6bYN1Mr_AKwzWvm9IfTXyHXmNFoxlSAnb0a_QlZLQ/exec"
postDict = {"sheetName":"meso", "weatherData":data}
try:
# Send with POST. Note: the postDict dictionary gets converted to a JSON string.
jsonRequest = requests.post( sheet_url, json=postDict)
except:
message_str = "Error opening URL."
print message_str + ", URL = " + sheet_url
def build_SQL_string( wd):
# Input parameter is the weather dictionary (wd)
sql_names = "INSERT INTO FifteenMinData (" +\
"PerlTime, DateTimeStamp, LiteralDateTimeStamp, TimeMDY, TimeHr, TimeMin, " +\
"StationNumber, StationName, WindDirection, WindSpeed, WindGust, " +\
"TempAvg, DewPoint, Pressure) "
sql_values = "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % (
wd['epoch_at_write'], wd['time_native_std'], wd['timeStamp_on_drybulb'], wd['MDY'], wd['Hr'], wd['Min'],
wd['station_number'], wd['station_name'], wd['wind_direction'], wd['wind_speed'], wd['wind_gust'],
wd['T_drybulb'], wd['T_dewpoint'], wd['ALTSE'])
sql_string = sql_names + sql_values
print "SQL string = ", sql_string
# Return SQL string and data list (to be used with error messages)
return ( sql_string, sql_values)
def runSQL( sql_string, row_ascii):
# row_ascii: Contains the variable names and is useful in constructing error messages.
successful_execution = False
# Execute SQL string
try:
database_conn.Execute( sql_string)
successful_execution = True
# For testing
#print "SQL = ", sql_string
#print "row_ascii = ", row_ascii
except pywintypes.com_error: # catch this specific error
# If only a "duplicate data" error, don't write to the log file
if ( string.find( str( sys.exc_value), "duplicate data") <> -1):
print "Data already in database (sql warning)."
else:
message_str = "SQL error ::: %s ==> %s, \nData value = %s" % ( sys.exc_type, sys.exc_value, row_ascii)
enterInLog( message_str)
print message_str
return successful_execution
except:
message_str = "general error ::: %s ==> %s, \nData value = %s" % ( sys.exc_type, sys.exc_value, row_ascii)
enterInLog( message_str)
print message_str
return successful_execution
else: # Run this block if no errors...
print "Successful SQL execution!"
return successful_execution
# # For testing, clean out any new records for this date.
# sql = "DELETE * FROM [FifteenMinData] WHERE ([TimeMDY] = #6/29/2006#)"
# database_conn.Execute(sql)
def enterInLog( logentry):
logFile.write('==============' + str(datetime.datetime.today()) + '====V ' + version_number + '\n')
logFile.write( logentry + '\n')
def openConnections():
# Open logFile
global logFile
# The getcwd os function returns the current working directory. Another
# similar command is os.path.abspath(""). This returns "C:\\waconia" on
# Physics2 and "C:\\code\\python\\scraper" at here at home. Note that if
# this is running as a system AT job, this will return something like
# "C:\\winnt\\system32"
logFileDir = os.getcwd()
# If it's not at home then it must be running on Physics2
if (logFileDir <> "C:\\code\\python\\scraper"):
logFileDir = "C:\\Users\\Jim\\Documents\\webcontent\\waconia"
logFileName = "ws_richland_log.txt"
logFilePath = logFileDir + "\\" + logFileName
# Check for the file and create a new one if none is found.
# Note: os.curdir doesn't work as a way to find the current directory when
# if this task is scheduled. So I use the absolute paths above.
if logFileName not in os.listdir( logFileDir):
try:
logFile = open(logFilePath, 'w') # create and write
except:
sys.exit("Error when opening log file. Script stopped!!!") # Shutdown if no way to log...
# Header in file.
logFile.write('==============VERSION '+ version_number +'========================' + '\n')
logFile.write('File created: ' + str(datetime.datetime.today()) + '\n')
logFile.write('=================================================' + '\n')
logFile.write('=================================================' + '\n')
logFile.write('=================================================' + '\n')
else:
logFile = open(logFilePath, 'a') # append
# Open Database connection
global database_conn
database_conn = win32com.client.Dispatch(r'ADODB.Connection')
# Set path to the database.
DSN = 'PROVIDER=Microsoft.Jet.OLEDB.4.0;DATA SOURCE=C:/Users/Jim/Documents/webcontent/waconia/data/telem.mdb;'
try:
database_conn.Open(DSN)
except:
enterInLog("Error ::: %s ==> %s" % (sys.exc_type, sys.exc_value))
# Close logFile on the way out.
logFile.close()
sys.exit("Error when opening database connection. Script stopped!!!")
def closeConnections():
database_conn.Close() # Notice the CAPITAL C
logFile.close() # Notice the lowercase c
def attemptWriteToDaysGleaned():
# This serves to keep the DaysGleaned table up to date. If this gleaner is
# the first to cross into a new day this should produce a successful write.
# Make a local tuple out of the global stationDateTime_maxValue
dT = stationDateTime_maxValue.timetuple()
sql_string = "INSERT INTO DaysGleaned ([TimeMDY]) VALUES ('%s/%s/%s')" % (dT[1],dT[2],dT[0])
try:
database_conn.Execute( sql_string)
except:
#print "days-gleaned error: " + str( sys.exc_value)
# If not a "duplicate data" error, write to the log file
if (string.find(str(sys.exc_value),"duplicate data") == -1): # can't find it in string
enterInLog("Error ::: %s ==> %s" % (sys.exc_type, sys.exc_value))
"""
Main program
"""
# global variables (any variable in Main that get assigned)
knots_to_mph = 1.15078030303
# This global is used for triggering a new day in the database's daysgleaned table.
stationDateTime_maxValue = datetime.datetime(2001,1,1) # initialize to some old date
# This also gets written to the log file.
version_number = "1.0"
print "Version Number = ", version_number
# Dictionary of dictionaries data structure used identify all the stations
# to be gleaned and associated parameters. Add more stations here if you like...
station_dic = {
# Washington
'KRLD':{'ID':'318','longName':'KRLD','dst':True}, # Richland, WA
'KMWH':{'ID':'357','longName':'KMWH','dst':True}, # Moses Lake, WA
'KEAT':{'ID':'365','longName':'KEAT','dst':True}, # Wenatchee, WA
'KNOW':{'ID':'366','longName':'KNOW','dst':True}, # Port Angeles, WA
'K0S9':{'ID':'367','longName':'K0S9','dst':True}, # Port Townsend, WA
# Alaska
#'PAWI':{'ID':'389','longName':'PAWI','dst':True}, # Wainwright AP, AK
#'PABR':{'ID':'390','longName':'PABR.2','dst':True}, # Barrow, AK
'PAQT':{'ID':'391','longName':'PAQT','dst':True}, # Nuiqsut, AK
'PASI':{'ID':'392','longName':'PASI','dst':True}, # Sitka, AK
'PAFA':{'ID':'393','longName':'PAFA','dst':True}, # Fairbanks Int AP, AK
'PATQ':{'ID':'394','longName':'PATQ','dst':True}, # Atqasuk, AK
'PANC':{'ID':'395','longName':'PANC','dst':True}, # Anchorage, AK
# BC Canada
'CYAZ':{'ID':'368','longName':'CYAZ','dst':True}, # Tofino, BC
# Oregon
'HOXO':{'ID':'354','longName':'HOXO','dst':True}, # Hood River, OR
'KOTH':{'ID':'356','longName':'KOTH','dst':True}, # North Bend, OR
# Fritz's sites on the cape...
'KHSE':{'ID':'358','longName':'KHSE.2','dst':True}, # Cape Hatteras, NC
'KCQX':{'ID':'359','longName':'KCQX.2','dst':True}, # Chatham, MA
# Hawaii
'PHOG':{'ID':'351','longName':'PHOG','dst':False}, # Maui Airport, HI
'PHJR':{'ID':'352','longName':'PHJR','dst':False}, # Oahu, Kalaeloa Airport, HI
'PHBK':{'ID':'353','longName':'PHBK','dst':False}, # Kauai, Barking Sands Airport, HI
# Kansas
'KOJC':{'ID':'388','longName':'KOJC','dst':True}, # Johnson County Executive Airport, Olathe, KS
# Missouri
'KSTL':{'ID':'360','longName':'KSTL','dst':True}, # Saint Louis, MO
'KJLN':{'ID':'387','longName':'KJLN','dst':True}, # Joplin Regional Airport, MO
# MN
'KMKT':{'ID':'355','longName':'KMKT.2','dst':True}, # Mankato, MN
'KSOM5':{'ID':'361','longName':'KSOM5','dst':True}, # Kasota Prairie, MN
'MN073':{'ID':'362','longName':'MN073','dst':True}, # Mankato, MN
# Columbia River (for delta-p chart)
'KDLS':{'ID':'166','longName':'KDLS','dst':True}, # Dalles, WA
'KTTD':{'ID':'167','longName':'KTTD','dst':True}, # Troutdale, OR
'KHRI':{'ID':'168','longName':'KHRI','dst':True} # Hermiston, OR
}
# Prepare to write to database and log file.
openConnections()
# Make a single JSON request for all the stations. Parse and write to database, once for each station.
processMultipleStations_json( station_dic)
# Close connections to database and log file.
closeConnections()