From bbac0261cef29cb51202c58dfe7a6cebe94650cf Mon Sep 17 00:00:00 2001 From: Ludee Date: Tue, 12 Dec 2017 16:11:28 +0100 Subject: [PATCH] update adapter version #4 #25 --- database_adapter/reeem_adapter_times_paneu.py | 129 +++++++++--------- 1 file changed, 66 insertions(+), 63 deletions(-) diff --git a/database_adapter/reeem_adapter_times_paneu.py b/database_adapter/reeem_adapter_times_paneu.py index 4c297d8..f149d64 100644 --- a/database_adapter/reeem_adapter_times_paneu.py +++ b/database_adapter/reeem_adapter_times_paneu.py @@ -1,34 +1,29 @@ """read data from file and write to database""" -__copyright__ = "© Reiner Lemoine Institut" -__license__ = "GNU Affero General Public License Version 3 (AGPL-3.0)" -__url__ = "https://www.gnu.org/licenses/agpl-3.0.en.html" -__author__ = "Ludwig Hülk" -__issue__ = "https://github.com/ReeemProject/reeem_db/issues/4" -__version__ = "v0.1.1" +__copyright__ = "© Reiner Lemoine Institut" +__license__ = "GNU Affero General Public License Version 3 (AGPL-3.0)" +__url__ = "https://www.gnu.org/licenses/agpl-3.0.en.html" +__author__ = "Ludwig Hülk" +__issue__ = "https://github.com/ReeemProject/reeem_db/issues/4" +__version__ = "v0.1.2" import os -import sys -import getpass -import logging import time import datetime import pandas as pd -import numpy as np from reeem_io import * -from sqlalchemy import * - -## inputs +# inputs model = 'TIMES PanEU' -pathway = 'Test_data' # 'BASE', 'BASE_TI1_P1', 'BASE_TI1_P2', 'Test_data', 'Pilot' -version = 'V1' # 'V2', 'V3' +pathway = 'BASE' # 'Pilot', 'BASE', 'BASE_TI1_P1', 'BASE_TI1_P2' +version = 'V1' # 'V2', 'V3' file_name_input = 'REEEM_TIMES_PanEU_Input_Structure.xlsx' # file_name_input = 'REEEM_TIMES_PanEU_Input_BASE.xlsx' # file_name_input = 'REEEM_TIMES_PanEU_Input_F1_TI1_P1.xlsx' # file_name_input = 'REEEM_TIMES_PanEU_Input.xlsx' -file_name_output = 'REEEM_TIMES_PanEU_Output_Structure.xlsx' +file_name_output = 'REEEM_TIMES_PanEU_Output_20171114_BASE_withoutRen.Target.xlsx' +# file_name_output = 'REEEM_TIMES_PanEU_Output_Structure.xlsx' # file_name_output = 'REEEM_TIMES_PanEU_Output_BASE.xlsx' # file_name_output = 'REEEM_TIMES_PanEU_Output_F1_TI1_P1.xlsx' # file_name_output = 'REEEM_TIMES_PanEU_Output.xlsx' @@ -41,44 +36,50 @@ empty_rows = 4 # database table -db_schema = 'model_draft' -db_table_input = 'reeem_times_paneu_input' -db_table_output = 'reeem_times_paneu_output' +db_schema = 'model_draft' +db_table_input = 'reeem_times_paneu_input' +db_table_output = 'reeem_times_paneu_output' + -## functions -def times_paneu_2_reeem_db(model, pathway, version, file_name, empty_rows, db_schema, db_table, region, con): +# functions +def times_paneu_2_reeem_db(model, pathway, version, file_name, + empty_rows, db_schema, db_table, region, con): """read excel file and sheets, make dataframe and write to database""" - + log = logger() - - ## read file + + # read file path = os.path.join('Model_Data', pathway, model, file_name) xls = pd.ExcelFile(path) df = pd.read_excel(xls, region, header=empty_rows, index_col='ID') log.info('...read sheet: {}'.format(region)) - - ## make dataframe - df.columns = ['indicator', 'unit', - '2010', '2015', '2020', '2025', '2030', '2035', '2040', '2045', '2050', - 'field', 'category', 'aggregation', 'source'] + + # make dataframe + df.columns = ['indicator', 'unit', + '2010', '2015', '2020', '2025', '2030', '2035', '2040', + '2045', '2050', 'field', 'category', 'aggregation', 'source'] df.index.names = ['nid'] # print(df.dtypes) # print(df.head()) - - ## seperate columns - dfunit = df[['field', 'category', 'indicator', 'unit', 'aggregation', 'source']].copy().dropna() + + # seperate columns + dfunit = df[['field', 'category', 'indicator', 'unit', 'aggregation', + 'source']].copy().dropna() dfunit.index.names = ['nid'] - dfunit.columns = ['field', 'category', 'indicator', 'unit', 'aggregation', 'source'] + dfunit.columns = ['field', 'category', 'indicator', 'unit', 'aggregation', + 'source'] # print(dfunit) # print(dfunit.dtypes) - - ## drop seperated columns - dfclean = df.drop(['field', 'category', 'indicator', 'unit', 'aggregation', 'source'],axis=1).dropna() + + # drop seperated columns + dfclean = df.drop( + ['field', 'category', 'indicator', 'unit', 'aggregation', 'source'], + axis=1).dropna() # print(dfclean) - - ## stack dataframe + + # stack dataframe dfstack = dfclean.stack().reset_index() - dfstack.columns = ['nid','year','value'] + dfstack.columns = ['nid', 'year', 'value'] # dfstack.set_index(['nid','year'], inplace=True) dfstack.index.names = ['id'] # print(dfstack) @@ -90,15 +91,15 @@ def times_paneu_2_reeem_db(model, pathway, version, file_name, empty_rows, db_sc dfdb['version'] = version dfdb['region'] = region dfdb['updated'] = (datetime.datetime.fromtimestamp(time.time()) - .strftime('%Y-%m-%d %H:%M:%S')) + .strftime('%Y-%m-%d %H:%M:%S')) # print(dfdb) - + # copy dataframe to database - dfdb.to_sql(con = con, - schema = db_schema, - name = db_table, - if_exists='append', - index = True ) + dfdb.to_sql(con=con, + schema=db_schema, + name=db_table, + if_exists='append', + index=True) log.info('......sheet {} sucessfully imported...'.format(region)) @@ -114,31 +115,33 @@ def times_paneu_2_reeem_db(model, pathway, version, file_name, empty_rows, db_sc log.info('...read file: {}'.format(file_name_input)) log.info('...read file: {}'.format(file_name_output)) log.info('...establish database connection...') - + # connection con = reeem_session() log.info('...read file(s)...') - - # input - for region in regions: - times_paneu_2_reeem_db(model, pathway, version, file_name_input, empty_rows, - db_schema, db_table_input, region, con) - - # scenario log - reeem_scenario_log(con,version,'import', db_schema, db_table_input, - os.path.basename(__file__), file_name_input) + + # # input + # for region in regions: + # times_paneu_2_reeem_db(model, pathway, version, file_name_input, + # empty_rows, + # db_schema, db_table_input, region, con) + # + # # scenario log + # reeem_scenario_log(con, version, 'import', db_schema, db_table_input, + # os.path.basename(__file__), file_name_input) # output for region in regions: - times_paneu_2_reeem_db(model, pathway, version, file_name_output, empty_rows, - db_schema, db_table_output, region, con) - + times_paneu_2_reeem_db(model, pathway, version, file_name_output, + empty_rows, + db_schema, db_table_output, region, con) + # scenario log - reeem_scenario_log(con,version,'import', db_schema, db_table_output, - os.path.basename(__file__), file_name_output) - + reeem_scenario_log(con, version, 'import', db_schema, db_table_output, + os.path.basename(__file__), file_name_output) + # close connection con.close() log.info('...script successfully executed in {:.2f} seconds...' - .format(time.time() - start_time)) + .format(time.time() - start_time)) log.info('...database connection closed. Goodbye!')