Skip to content

Commit

Permalink
update adapter version #4 #25
Browse files Browse the repository at this point in the history
  • Loading branch information
Ludee committed Dec 12, 2017
1 parent b8f20ee commit bbac026
Showing 1 changed file with 66 additions and 63 deletions.
129 changes: 66 additions & 63 deletions database_adapter/reeem_adapter_times_paneu.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,29 @@
"""read data from file and write to database"""

__copyright__ = "© Reiner Lemoine Institut"
__license__ = "GNU Affero General Public License Version 3 (AGPL-3.0)"
__url__ = "https://www.gnu.org/licenses/agpl-3.0.en.html"
__author__ = "Ludwig Hülk"
__issue__ = "https://github.com/ReeemProject/reeem_db/issues/4"
__version__ = "v0.1.1"
__copyright__ = "© Reiner Lemoine Institut"
__license__ = "GNU Affero General Public License Version 3 (AGPL-3.0)"
__url__ = "https://www.gnu.org/licenses/agpl-3.0.en.html"
__author__ = "Ludwig Hülk"
__issue__ = "https://github.com/ReeemProject/reeem_db/issues/4"
__version__ = "v0.1.2"

import os
import sys
import getpass
import logging
import time
import datetime
import pandas as pd
import numpy as np
from reeem_io import *
from sqlalchemy import *


## inputs
# inputs
model = 'TIMES PanEU'
pathway = 'Test_data' # 'BASE', 'BASE_TI1_P1', 'BASE_TI1_P2', 'Test_data', 'Pilot'
version = 'V1' # 'V2', 'V3'
pathway = 'BASE' # 'Pilot', 'BASE', 'BASE_TI1_P1', 'BASE_TI1_P2'
version = 'V1' # 'V2', 'V3'

file_name_input = 'REEEM_TIMES_PanEU_Input_Structure.xlsx'
# file_name_input = 'REEEM_TIMES_PanEU_Input_BASE.xlsx'
# file_name_input = 'REEEM_TIMES_PanEU_Input_F1_TI1_P1.xlsx'
# file_name_input = 'REEEM_TIMES_PanEU_Input.xlsx'
file_name_output = 'REEEM_TIMES_PanEU_Output_Structure.xlsx'
file_name_output = 'REEEM_TIMES_PanEU_Output_20171114_BASE_withoutRen.Target.xlsx'
# file_name_output = 'REEEM_TIMES_PanEU_Output_Structure.xlsx'
# file_name_output = 'REEEM_TIMES_PanEU_Output_BASE.xlsx'
# file_name_output = 'REEEM_TIMES_PanEU_Output_F1_TI1_P1.xlsx'
# file_name_output = 'REEEM_TIMES_PanEU_Output.xlsx'
Expand All @@ -41,44 +36,50 @@
empty_rows = 4

# database table
db_schema = 'model_draft'
db_table_input = 'reeem_times_paneu_input'
db_table_output = 'reeem_times_paneu_output'
db_schema = 'model_draft'
db_table_input = 'reeem_times_paneu_input'
db_table_output = 'reeem_times_paneu_output'


## functions
def times_paneu_2_reeem_db(model, pathway, version, file_name, empty_rows, db_schema, db_table, region, con):
# functions
def times_paneu_2_reeem_db(model, pathway, version, file_name,
empty_rows, db_schema, db_table, region, con):
"""read excel file and sheets, make dataframe and write to database"""

log = logger()
## read file

# read file
path = os.path.join('Model_Data', pathway, model, file_name)
xls = pd.ExcelFile(path)
df = pd.read_excel(xls, region, header=empty_rows, index_col='ID')
log.info('...read sheet: {}'.format(region))
## make dataframe
df.columns = ['indicator', 'unit',
'2010', '2015', '2020', '2025', '2030', '2035', '2040', '2045', '2050',
'field', 'category', 'aggregation', 'source']

# make dataframe
df.columns = ['indicator', 'unit',
'2010', '2015', '2020', '2025', '2030', '2035', '2040',
'2045', '2050', 'field', 'category', 'aggregation', 'source']
df.index.names = ['nid']
# print(df.dtypes)
# print(df.head())

## seperate columns
dfunit = df[['field', 'category', 'indicator', 'unit', 'aggregation', 'source']].copy().dropna()

# seperate columns
dfunit = df[['field', 'category', 'indicator', 'unit', 'aggregation',
'source']].copy().dropna()
dfunit.index.names = ['nid']
dfunit.columns = ['field', 'category', 'indicator', 'unit', 'aggregation', 'source']
dfunit.columns = ['field', 'category', 'indicator', 'unit', 'aggregation',
'source']
# print(dfunit)
# print(dfunit.dtypes)

## drop seperated columns
dfclean = df.drop(['field', 'category', 'indicator', 'unit', 'aggregation', 'source'],axis=1).dropna()

# drop seperated columns
dfclean = df.drop(
['field', 'category', 'indicator', 'unit', 'aggregation', 'source'],
axis=1).dropna()
# print(dfclean)
## stack dataframe

# stack dataframe
dfstack = dfclean.stack().reset_index()
dfstack.columns = ['nid','year','value']
dfstack.columns = ['nid', 'year', 'value']
# dfstack.set_index(['nid','year'], inplace=True)
dfstack.index.names = ['id']
# print(dfstack)
Expand All @@ -90,15 +91,15 @@ def times_paneu_2_reeem_db(model, pathway, version, file_name, empty_rows, db_sc
dfdb['version'] = version
dfdb['region'] = region
dfdb['updated'] = (datetime.datetime.fromtimestamp(time.time())
.strftime('%Y-%m-%d %H:%M:%S'))
.strftime('%Y-%m-%d %H:%M:%S'))
# print(dfdb)

# copy dataframe to database
dfdb.to_sql(con = con,
schema = db_schema,
name = db_table,
if_exists='append',
index = True )
dfdb.to_sql(con=con,
schema=db_schema,
name=db_table,
if_exists='append',
index=True)
log.info('......sheet {} sucessfully imported...'.format(region))


Expand All @@ -114,31 +115,33 @@ def times_paneu_2_reeem_db(model, pathway, version, file_name, empty_rows, db_sc
log.info('...read file: {}'.format(file_name_input))
log.info('...read file: {}'.format(file_name_output))
log.info('...establish database connection...')

# connection
con = reeem_session()
log.info('...read file(s)...')

# input
for region in regions:
times_paneu_2_reeem_db(model, pathway, version, file_name_input, empty_rows,
db_schema, db_table_input, region, con)

# scenario log
reeem_scenario_log(con,version,'import', db_schema, db_table_input,
os.path.basename(__file__), file_name_input)

# # input
# for region in regions:
# times_paneu_2_reeem_db(model, pathway, version, file_name_input,
# empty_rows,
# db_schema, db_table_input, region, con)
#
# # scenario log
# reeem_scenario_log(con, version, 'import', db_schema, db_table_input,
# os.path.basename(__file__), file_name_input)

# output
for region in regions:
times_paneu_2_reeem_db(model, pathway, version, file_name_output, empty_rows,
db_schema, db_table_output, region, con)

times_paneu_2_reeem_db(model, pathway, version, file_name_output,
empty_rows,
db_schema, db_table_output, region, con)

# scenario log
reeem_scenario_log(con,version,'import', db_schema, db_table_output,
os.path.basename(__file__), file_name_output)
reeem_scenario_log(con, version, 'import', db_schema, db_table_output,
os.path.basename(__file__), file_name_output)

# close connection
con.close()
log.info('...script successfully executed in {:.2f} seconds...'
.format(time.time() - start_time))
.format(time.time() - start_time))
log.info('...database connection closed. Goodbye!')

0 comments on commit bbac026

Please sign in to comment.