Commit d7717864 authored by Mikkel Friis-Møller's avatar Mikkel Friis-Møller
Browse files

switched from 'find_in_set' to 'like' as 'find_in_set' slows down extremely on...

switched from 'find_in_set' to 'like' as 'find_in_set' slows down extremely on the high frequency database
parent 53299f76
Pipeline #6086 passed with stage
in 37 seconds
import mysql.connector
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime
def SQLconnect(I):
cnx = mysql.connector.connect(
......@@ -21,6 +21,7 @@ def SQLdataframe(cnx, table_name, limit='default', col='*', row='1=1', logical_s
col, table_name, row)
if len(logical_statements)>0:
string += ' AND ' + ' AND '.join(logical_statements)
# print(string)
df = pd.read_sql(
string,
con=cnx
......@@ -46,9 +47,10 @@ def SQL_read_table(I):
rows = I['rows']
strings = []
for row in rows:
string = " FIND_IN_SET(`Name`,'{}')>0".format(','.join(rows[row]))
strings.append(string)
row = ' AND '.join(strings)
for ts in rows[row]:
string = " `{}` LIKE '{}'".format(row, ts)
strings.append(string)
row = '(' + ' OR '.join(strings) + ')'
else:
row = '1=1'
if 'logical_statements' in I:
......@@ -309,59 +311,47 @@ def fill_channel_specifications(I):
if __name__ == '__main__':
import password
from pprint import pprint
import pickle
from datetime import datetime
# tbls = ['min', 'max', 'mean', 'std', 'metadata']
# lcs = ['lc1_wsp']
# for t in tbls:
# for lc in lcs:
# tbl = lc + '_sim_' + t
# I = {
# 'user': 'mikf',
# 'password': password.pass_sql,
# 'host': '10.40.20.14',
# 'database': 'v52_simulation',
# 'table_name': tbl,
# 'old_name': 'name',
# 'new_name': 'Name',
# 'unique_keys': ['Name','LC'],
# }
## alter_column_name(I)
# df = SQL_read_table(I)
# dup_index = df.duplicated(keep='first')
# clear_duplicates(I)
# pprint(df['windspeed-global-Abs_vhor-0.00-0.00--44.00'])
# print(list(df)[0:5])
user = password.user
password = password.pass_sql
I = {
'user': 'mikf',
'password': password.pass_sql,
'table_name': 'caldata_2018_03_50hz',
'user': user,
'password': password,
'database': 'v52_wtg',
'host': '10.40.20.14',
'database': 'v52_simulation',
'table_name': 'calmeans',
# 'old_name': 'name',
# 'new_name': 'Name',
# 'unique_keys': ['Name','LC'],
'logical_statements': ['`windspeed-global-Vy-0.00-0.00--44.00` > 6',
'`windspeed-global-Vy-0.00-0.00--44.00`<7'],
}
df = SQL_read_table(I)
print(df['windspeed-global-Vy-0.00-0.00--44.00'])
# fill_Channel_names(I)
# meta_data = pickle.load(open(r"C:\Sandbox\Git\V52\temp\HAWC2\res\lc1_wsp\201802010100_metadata.p",'rb'))
# create_channel_spec(I)
# fill_channel_spec(I,meta_data)
# pprint(dic)
# for key in dic:
# print(len(dic[key]),key)
# fill_channel_specifications(I)
print('\n********* Done ***********\n')
cnx = SQLconnect(I)
# string = "SELECT * FROM caldata_2018_03_50hz WHERE `Name` LIKE '201803010100'"
# startTime = datetime.now()
# df = pd.read_sql(string, con=cnx)
# print(datetime.now() - startTime)
## 0:00:22.675397
# string = "SELECT * FROM caldata_2018_03_50hz WHERE `Name` >= '201803010100' AND `Name` <= '201803010100'"
# startTime = datetime.now()
# df = pd.read_sql(string, con=cnx)
# print(datetime.now() - startTime)
## 0:00:23.207084
# string = "SELECT * FROM caldata_2018_03_50hz WHERE FIND_IN_SET(`Name`,'201803010100')"
# startTime = datetime.now()
# df = pd.read_sql(string, con=cnx)
# print(datetime.now() - startTime)
## Impossible
# string = "SELECT * FROM caldata_2018_03_50hz WHERE `Name` LIKE '201803010100' OR `Name` LIKE '201803010110' OR `Name` LIKE '201803010120'"
# startTime = datetime.now()
# df = pd.read_sql(string, con=cnx)
# print(datetime.now() - startTime)
## 0:01:08.708687
# string = "SELECT * FROM caldata_2018_03_50hz WHERE `Name` >= '201803010100' AND `Name` <= '201803010120'"
# startTime = datetime.now()
# df = pd.read_sql(string, con=cnx)
# print(datetime.now() - startTime)
## 0:01:10.284159
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment