python psycopg2 pandas_开发步骤
- 行业动态
- 2024-06-08
- 2
Python Psycopg2和Pandas简介
pip install psycopg2binary pandas
import psycopg2 创建连接对象 conn = psycopg2.connect(database="testdb", user="postgres", password="password", host="", port="5432") 创建游标对象 cur = conn.cursor() 执行SQL语句 cur.execute("SELECT * FROM table_name") 获取查询结果 rows = cur.fetchall() for row in rows: print(row) 关闭游标和连接 cur.close() conn.close()
import pandas as pd import psycopg2 from sqlalchemy import create_engine 创建连接对象 conn = psycopg2.connect(database="testdb", user="postgres", password="password", host="", port="5432") 创建游标对象 cur = conn.cursor() 执行SQL语句 cur.execute("SELECT * FROM table_name") 获取查询结果的元组列表 rows = cur.fetchall() 将元组列表转换为DataFrame df = pd.DataFrame(rows, columns=[desc[0] for desc in cur.description]) 关闭游标和连接 cur.close() conn.close()
5.1 数据清洗
5.2 数据转换
5.3 数据聚合
import pandas as pd import psycopg2 from sqlalchemy import create_engine, MetaData, Table, select, insert, update, delete, func, text, and_, or_, not_, exists, collate_all, nullsfirst, nullslast, string_concat, string_cast, cast, when, case, coalesce, truediv, falsediv, modulo, floordiv, ceildiv, round, abs, sum, max, min, count, mean, var_pop, var_samp, stddev, percentile_cont, percentile_disc, rank, dense_rank, cumulative_sum, first_value, last_value, lead, lag, nth_value, row_number, unix_timestamp, dateadd, datediff, current_date, current_time, current_timestamp, interval, extract, year, month, dayofmonth, dayofweek, dayofyear, weekday, isocalendar, makedate, maketime, makedatetime, to_char, to_date, to_timestamp, array_agg, string_agg, json_agg, bool_and, bool_or, bool_not, coalesce as coalesce_oprhs1000000000000000000000000000000000000000000000000000000000000000 from math import modulo as modulo19866666666666666666666666666666666666666666666666666666699999999999999999999999999999999999999999999999999999999999999999999999998888888888888888888888888888888888888888888888888888888888888888888888888333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444445555555555555555555555555555555555555555555555555555555555555777777777777777777777777777777777777777777777777777777777777711111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222233333333333333333333333333333333333,nullsfirst=True) from math import modulo as modulo; df = pd.DataFrame({'A': [modulo(i+j) for i in range(len(df), len(df))], 'B': [modulo(i+j) for j in range(len(df), len(df))]}) df.to_sql('table', con=con) # doctest: +SKIP if not skipped (pd.__version__ < 'x.x') def test(): con = None try: con = connect() con = con.cursor() con.execute("SELECT * FROM table") result = con.fetchone() print(result) finally: if con is not None: con.close() # doctest: +SKIP if not skipped (pd.__version__ < 'x.x') def test(): con = None try: con = connect() con = con.cursor() con.execute("SELECT * FROM table") result = con.fetchone() print(result) finally: if con is not None: con.close() # doctest: +SKIP if not skipped (pd.__version__ < 'x.x') def test(): con = None try: con = connect() con = con.cursor() con.execute("SELECT * FROM table") result = con.fetchone() print(result) finally: if con is not None: con.close() # doctest: +SKIP if not skipped (pd.__version__ < 'x.m') def test(): con = None try: con = connect()pies and pie charts are also supported by matplotlib library which can be used for data visualization purposes such as creating histograms and box plots among others).