Automating CSV TO MySQL Insertion in Python- cursor.execute() hangs

2022-03-31T02:33:05

I'm trying to automate csv insertion into a mysql database. I go through creating the database and tables in a Python script and then run it in a Jupyter Notebook. However, for some reason the final cursor.execute(SQL_STATEMENT) seems to hang and I am not able to insert the csv values into the database. I get no logs suggesting why this is the case:

This is my csv_import_functions.py:

import os
import numpy as np
import pandas as pd
import mysql.connector


def csv_files():

    # get names of only csv files
    csv_files = []
    for file in os.listdir(os.getcwd()):
        if file.endswith(".csv"):
            csv_files.append(file)

    return csv_files

def create_df(csv_files):
    data_path = os.getcwd()+'/'

    # loop through the files and create the dataframe
    df = {}
    for file in csv_files:
        try:
            df[file] = pd.read_csv(data_path+file)
        except UnicodeDecodeError:
            # if utf-8 encoding error
            df[file] = pd.read_csv(data_path+file, encoding="ISO-8859-1")
        print(file)

    return df


def clean_tbl_name(filename):

    # rename csv, force lower case, no spaces, no dashes
    clean_tbl_name = filename.lower().replace(" ", "").replace(
        "-", "_").replace(r"/", "_").replace("\\", "_").replace("$", "").replace("%", "")

    tbl_name = '{0}'.format(clean_tbl_name.split('.')[0])

    return tbl_name


def clean_colname(dataframe):

    # force column names to be lower case, no spaces, no dashes
    dataframe.columns = [x.lower().replace(" ", "_").replace("-", "_").replace(r"/", "_").replace(
        "\\", "_").replace(".", "_").replace("$", "").replace("%", "") for x in dataframe.columns]

    # processing data
    replacements = {
        'timedelta64[ns]': 'varchar(100)',
        'object': 'varchar(100)',
        'float64': 'float',
        'int64': 'int',
        'datetime64': 'timestamp'
    }

    col_str = ", ".join("{} {}".format(n, d) for (n, d) in zip(
        dataframe.columns, dataframe.dtypes.replace(replacements)))

    return col_str, dataframe.columns


def upload_to_db(host, database, user, password, tbl_name, col_str, file, dataframe, dataframe_columns):

    conn_string = "host=%s, database=%s, user=%s, password=%s, port=%s" % (
        host, database, user, password)
    print("string is: " + conn_string)
    conn = mysql.connector.connect(
        host=host, database=database, user=user, password=password)
    cursor = conn.cursor()
    print('opened database successfully')

    print("drop table if exists %s;" % (tbl_name))
    print("create table %s (%s);" % (tbl_name, col_str))

    # drop table with same name
    cursor.execute("drop table if exists %s;" % (tbl_name))

    # create table
    cursor.execute("create table %s (%s);" % (tbl_name, col_str))
    print('{0} was created successfully'.format(tbl_name))

    # save df to csv
    dataframe.to_csv(file, header=dataframe_columns,
                     index=False, encoding='utf-8')

    col_names = col_str.replace(
        ' varchar(100)', '').replace(' int', '').replace(' float', '')

    # upload to db
    SQL_STATEMENT = """
    LOAD DATA INFILE '%s' INTO TABLE %s
    FIELDS TERMINATED BY ',' ENCLOSED BY '"'
    LINES TERMINATED BY '\r\n'
    IGNORE 1 LINES
    (%s);
    """ % (os.getcwd().replace(os.sep, '/') + '/' + file, tbl_name, col_names)

    print(SQL_STATEMENT)

    cursor.execute(SQL_STATEMENT)

    print('file copied to db')

    cursor.execute("grant select on table %s to public" % tbl_name)
    conn.commit()
    cursor.close()
    print('table {0} imported to db completed'.format(tbl_name))

    return

And my Jupyter Notebook (main.ipynb):

import os
import numpy as np
import pandas as pd
import mysql.connector

#main 

from csv_import_functions import *

#settings
dataset_dir = 'datasets'

#db settings
host = 'localhost'
database = 'nba_data'
user = 'user'
password = 'password'

#configure environment and create main df
csv_files = csv_files()
df = create_df( csv_files)

for k in csv_files:

    #call dataframe
    dataframe = df[k]

    #clean table name
    tbl_name = clean_tbl_name(k)
    
    #clean column names
    col_str, dataframe.columns = clean_colname(dataframe)
    
    #upload data to db   
    upload_to_db(host, 
                 database, 
                 user, 
                 password,
                 tbl_name, 
                 col_str, 
                 file=k, 
                 dataframe=dataframe, 
                 dataframe_columns=dataframe.columns)

Finally, here is the output before it hangs:

nba-playbyplay.csv
string is: host=localhost, database=nba_data, user=user, password=password
opened database successfully
drop table if exists nba_playbyplay;
create table nba_playbyplay (url varchar(100), gametype varchar(100), location varchar(100), date varchar(100), time varchar(100), winningteam varchar(100), quarter int, secleft int, awayteam varchar(100), awayplay varchar(100), awayscore int, hometeam varchar(100), homeplay float, homescore int, shooter float, shottype float, shotoutcome float, shotdist float, assister float, blocker float, foultype float, fouler float, fouled float, rebounder float, reboundtype float, violationplayer float, violationtype float, timeoutteam float, freethrowshooter float, freethrowoutcome float, freethrownum float, entergame float, leavegame float, turnoverplayer float, turnovertype float, turnovercause float, turnovercauser float, jumpballawayplayer varchar(100), jumpballhomeplayer varchar(100), jumpballposs varchar(100));

Copyright License:
Author:「clattenburg cake」,Reproduced under the CC 4.0 BY-SA copyright license with link to original source & disclaimer.
Link to:https://stackoverflow.com/questions/71682385/automating-csv-to-mysql-insertion-in-python-cursor-execute-hangs

About “Automating CSV TO MySQL Insertion in Python- cursor.execute() hangs” questions

I'm trying to automate csv insertion into a mysql database. I go through creating the database and tables in a Python script and then run it in a Jupyter Notebook. However, for some reason the final
I'm using a python driver (mysql.connector) and do the following: _db_config = { 'user': 'root', 'password': '1111111', 'host': '10.20.30.40', 'database
I have trouble with insert of csv data into MySQL tabel with mysql.connector . The code I use looks like this : import mysql.connector import csv andreport = 'testowa.csv' cnx = mysql.connector.
I have a big mysql table and I want to export it to csv file using python. However, when I use cursor.fetchall() my laptop hangs and when I use cursor.execute("select * from table_name") while Tru...
I need to insert a 60000x24 dataframe into a mysql database (MariaDB) using sqlalchemy and python. The database runs locally and the data insertion runs locally as well. For now I have been using the
I did import of csv file in MySQL database by means of mysql-connection in python, but, something went wrong and I completely don't know what exactly and how to fix it. For example, consider last a...
Preventing duplicate row insertion in mysql while importing csv file.I want to insert data into mysql table via importing csv file. how to prevent duplicate row insert?
I am using python 3 in conjunction with py2neo (v 3.1.2) to insert a large amount of data from MySQL to Neo4j. The table in MySQL has about 20 million rows. I want to do the insertion without conve...
I'm a newbie in Python I'm debugging an existing script which is as follows, print "Table name: %s " %table_name_r print "Between: %s " % between cursor = db.cursor() print "Total Rows:
Attempting to take a CSV from a URL and store it in MYSQL. I am able to see the CSV values in terminal, but I'm hung up on loading the values into sql, currently this is only loading the first line.

Copyright License:Reproduced under the CC 4.0 BY-SA copyright license with link to original source & disclaimer.