Skip to content

Commit

Permalink
adjust times paneu to new structure #4 #5
Browse files Browse the repository at this point in the history
  • Loading branch information
Ludee committed Nov 2, 2017
1 parent 590a4bb commit 7da1f50
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 23 deletions.
6 changes: 3 additions & 3 deletions database_adapter/reeem_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@
def reeem_session():
"""SQLAlchemy session object with valid connection to reeem database"""
print('Please provide connection parameters to database:\n' +
'Hit [Enter] to take defaults')
'Hit [Enter] to take defaults:')

#host = input('host (default 130.226.55.43): ')
host = '130.226.55.43'
#port = input('port (default 5432): ')
port = '5432'
#database = input("database name (default 'reeem'): ")
database = 'reeem'
user = input('user (default test_user): ')
user = input('User name (default test_user): ')
# password = input('password: ')
password = getpass.getpass(prompt='password: ',
password = getpass.getpass(prompt='Password: ',
stream=sys.stderr)
con = create_engine(
'postgresql://' + '%s:%s@%s:%s/%s' % (user,
Expand Down
49 changes: 29 additions & 20 deletions database_adapter/reeem_times_paneu_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,43 @@ def log():
datefmt='%Y-%m-%d %I:%M:%S')
return logger


def excel_dataframe(version, region, con):
"""read excel file and sheets and make dataframe"""

# read data from file
path = (r'G:\\dtu\\reeem_db\\database\\database_adapter\\data\\TIMES_PANEU\\')
file = 'REEEM_TIMES_PanEU_Input_F1_TI1_P1.xlsx' #'REEEM_TIMES_PanEU_Input.xlsx'
pathway = 'Test_data'
file = 'REEEM_TIMES_PanEU_Input_Structure.xlsx'

# file = 'REEEM_TIMES_PanEU_Input_F1_TI1_P1.xlsx'
# file = 'REEEM_TIMES_PanEU_Input.xlsx'

#path = (r'G:\\github\\ReeemProject\\reeem_db\\database_adapter\\Model_Data\\Test_data')
path = os.path.join('Model_Data', pathway, 'TIMES PanEU',file)

logger = log()
# logger.info('...read file: {}'.format(file))
xls = pd.ExcelFile(path+file)
xls = pd.ExcelFile(path)
sheet = region
logger.info('...read sheet: {}'.format(sheet))
df = pd.read_excel(xls, sheet, header=4, index_col='ID') # header=1
df.columns = ['name','unit','2010','2015','2020','2025','2030','2035',
'2040','2045','2050','table','aggregation','source'] # add source

df = pd.read_excel(xls, sheet, header=4, index_col='ID')
df.columns = ['indicator', 'unit',
'2010', '2015', '2020', '2025', '2030', '2035', '2040', '2045', '2050',
'field', 'category', 'aggregation', 'source']
df.index.names = ['nid']
# logger.info('...read data...')
logger.info('...read data...')
# print(df.dtypes)
# print(df.head())

# seperate columns
dfunit = df[['table','name','unit','aggregation','source']].copy().dropna() # add source
dfunit = df[['field', 'category', 'indicator', 'unit', 'aggregation', 'source']].copy().dropna()
dfunit.index.names = ['nid']
dfunit.columns = ['table','name','unit','aggregation','source'] # add source
dfunit.columns = ['field', 'category', 'indicator', 'unit', 'aggregation', 'source']
# print(dfunit)
# print(dfunit.dtypes)

# drop seperated columns
dfclean = df.drop(['table','name','unit','aggregation','source'],axis=1).dropna() # add source
dfclean = df.drop(['field', 'category', 'indicator', 'unit', 'aggregation', 'source'],axis=1).dropna()
# print(dfclean)

# stack
Expand All @@ -65,7 +73,7 @@ def excel_dataframe(version, region, con):
# print(dfstack)

# database dataframe
# logger.info('...reshape dataframe...')
logger.info('...reshape dataframe...')
dfdb = dfstack.join(dfunit, on='nid')
dfdb.index.names = ['dfid']
dfdb['region'] = region
Expand All @@ -77,7 +85,7 @@ def excel_dataframe(version, region, con):
# copy dataframe to database
dfdb.to_sql(con=con,
schema='model_draft',
name='times_paneu_service',
name='reeem_times_paneu_input',
if_exists='append',
index = True )
logger.info('...dataframe sucessfully imported...')
Expand All @@ -87,19 +95,20 @@ def excel_dataframe(version, region, con):
logger = log()
start_time = time.time()
logger.info('script started...')
logger.info('...establish database connection...')
con = reeem_session()
version = 'F1_TI1_P1' # F1_TI1_P1
version = 'Test_data' # 'F1_TI1_P1' # 'F1_TI1_P1'
# region = 'EU28'
# excel_dataframe(version, region, con)
regions = ['EU28', 'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES',
'FI', 'FR', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL',
'PL', 'PT', 'RO', 'SE', 'SI', 'SK', 'UK']
#regions = ['EU28', 'AT', 'BE' ]
#regions = ['EU28', 'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES',
# 'FI', 'FR', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL',
# 'PL', 'PT', 'RO', 'SE', 'SI', 'SK', 'UK']
regions = ['EU28', 'AT', 'BE' ]
for region in regions:
excel_dataframe(version, region, con)
reeem_scenario_log(con,version,'import', 'model_draft',
'times_paneu_service','reeem_times_paneu_service_input.py',
'REEEM_TIMES_PanEU_Input_F1_TI1_P1.xlsx') # add new filename
'reeem_times_paneu_input','reeem_times_paneu_input.py',
'REEEM_TIMES_PanEU_Input_Structure.xlsx') # add new filename
con.close()
logger.info('...database connection closed...')
logger.info('...script successfully executed in {:.2f} seconds. Goodbye!'
Expand Down

0 comments on commit 7da1f50

Please sign in to comment.