博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
外连接的用法 -- 《SQL进阶教程》 jupyter note
阅读量:5270 次
发布时间:2019-06-14

本文共 10657 字,大约阅读时间需要 35 分钟。

import pandas as pdimport sqlite3conn = sqlite3.connect('1-5.db')

用外连接进行行列转换1(行 -> 列): 制作交叉表

怎么使用outer join,将row转换成column

下面的方式一,使用的是外连接的方法。但是效果却是最差的。

# create Course Table and insert dataconn.execute("""    CREATE TABLE IF NOT EXISTS Courses (        name VARCHAR(10) NOT NULL,         course VARCHAR(10),        PRIMARY KEY(name, course)    ) """)conn.execute("""DELETE FROM Courses;""")conn.execute("""    INSERT INTO Courses(name, course)     VALUES         ('赤井', 'SQL入门'),        ('赤井', 'UNIX基础'),        ('铃木', 'SQL入门'),        ('工藤', 'SQL入门'),        ('工藤', 'Java中级'),        ('吉田', 'UNIX基础'),        ('渡边', 'SQL入门')""")pd.read_sql_query('SELECT * FROM Courses', conn)
  name course
0 赤井 SQL入门
1 赤井 UNIX基础
2 铃木 SQL入门
3 工藤 SQL入门
4 工藤 Java中级
5 吉田 UNIX基础
6 渡边 SQL入门
# 方式一: 每个要转换的列都使用一pd.read_sql_query("""    SELECT c0.name,        CASE WHEN c1.name IS NOT NULL THEN 'O' ELSE 'X' END AS 'SQL入门',        CASE WHEN c2.name IS NOT NULL THEN 'O' ELSE 'X' END AS 'UNIX基础',        CASE WHEN c3.name is NOT NULL THEN 'O' ELSE 'X' END AS 'Java中级'    FROM (SELECT DISTINCT name FROM Courses) C0    LEFT OUTER JOIN        (SELECT name FROM Courses WHERE course='SQL入门') C1        ON c0.name = c1.name    LEFT OUTER JOIN        (SELECT name FROM Courses WHERE course='UNIX基础') C2        ON c0.name = c2.name    LEFT OUTER JOIN        (SELECT name FROM Courses WHERE course='Java中级') c3        ON c0.name = c3.name;""", conn)
  name SQL入门 UNIX基础 Java中级
0 吉田 X O X
1 工藤 O X O
2 渡边 O X X
3 赤井 O O X
4 铃木 O X X
# 方式二: 使用标量子查询代替外连接# 优点是,加入新的列,只需要修改SELECT下的子查询。而方式一需要修改SELECT和FROM两处.# 缺点是,在SELECT中使用标量子查询,开销比较大。因为需要对每一行都进行一次或多次子查询.pd.read_sql_query("""    SELECT c0.name,        (SELECT 'O'         FROM Courses AS c1         WHERE course='SQL入门' AND c0.name = c1.name) AS 'SQL入门',        (SELECT 'O'         FROM Courses AS c2         WHERE course='UNIX基础' AND c0.name = c2.name) AS 'UNIX基础',        (SELECT 'O'         FROM Courses AS c3         WHERE course='Java中级' AND c0.name = c3.name) AS 'Java中级'    FROM (SELECT DISTINCT name FROM Courses) AS c0;""", conn).replace(to_replace=[None], value='X')
  name SQL入门 UNIX基础 Java中级
0 吉田 X O X
1 工藤 O X O
2 渡边 O X X
3 赤井 O O X
4 铃木 O X X
# 方式三: 嵌套使用CASE表单是pd.read_sql_query("""    SELECT name,      CASE WHEN SUM(CASE WHEN course='SQL入门' THEN 1 ELSE 0 END) = 1 THEN 'O' ELSE 'X' END AS 'SQL入门',      CASE WHEN SUM(CASE WHEN course='UNIX基础' THEN 1 ELSE 0 END) = 1 THEN 'O' ELSE 'X' END AS 'UNIX基础',      CASE WHEN SUM(CASE WHEN course='Java中级' THEN 1 ELSE 0 END) = 1 THEN 'O' ELSE 'X' END AS 'Java中级'    FROM Courses    GROUP BY name""", conn)
  name SQL入门 UNIX基础 Java中级
0 吉田 X O X
1 工藤 O X O
2 渡边 O X X
3 赤井 O O X
4 铃木 O X X

用外连接进行行列转换2(列 -> 行): 汇总重复项于一列

# create Personnel Table and insert dataconn.execute("""    CREATE TABLE IF NOT EXISTS Personnel (        employee VARCHAR(10) NOT NULL PRIMARY KEY,        child_1 VARCHAR(10),        child_2 VARCHAR(10),        child_3 VARCHAR(10)    ) """)conn.execute("""DELETE FROM Personnel;""")conn.execute("""    INSERT INTO Personnel(employee, child_1, child_2, child_3)     VALUES         ('赤井', '一郎', '二郎', '三郎'),        ('工藤', '春子', '夏子', null),        ('铃木', '夏子', null, null),        ('吉田', null, null, null)""")pd.read_sql_query('SELECT * FROM Personnel', conn)
  employee child_1 child_2 child_3
0 赤井 一郎 二郎 三郎
1 工藤 春子 夏子 None
2 铃木 夏子 None None
3 吉田 None None None
# 这个方法,会把child为null的结果也返回。# 但是如果排除掉child为null的结果,那么吉田这个employee因为没有孩子,就不会出现在结果表里面pd.read_sql_query("""    SELECT employee, child_1 AS child FROM Personnel    UNION ALL    SELECT employee, child_2 AS child FROM Personnel    UNION ALL    SELECT employee, child_3 AS child FROM Personnel""", conn)
  employee child
0 赤井 一郎
1 工藤 春子
2 铃木 夏子
3 吉田 None
4 赤井 二郎
5 工藤 夏子
6 铃木 None
7 吉田 None
8 赤井 三郎
9 工藤 None
10 铃木 None
11 吉田 None
conn.execute("""    CREATE VIEW IF NOT EXISTS Children(child) AS        SELECT child_1 FROM Personnel        UNION        SELECT child_2 FROM Personnel        UNION        SELECT child_3 FROM Personnel""")pd.read_sql_query("""    SELECT EMP.employee, CHILDREN.child    FROM Personnel EMP        LEFT OUTER JOIN Children            ON Children.child IN (EMP.child_1, EMP.child_2, EMP.child_3)""", conn)
  employee child
0 赤井 一郎
1 赤井 三郎
2 赤井 二郎
3 工藤 夏子
4 工藤 春子
5 铃木 夏子
6 吉田 None
# 习题1-5-2: 求每个雇员的孩子数量pd.read_sql_query("""    SELECT EMP.employee, COUNT(CHILDREN.child) AS child_cnt    FROM Personnel EMP        LEFT OUTER JOIN Children            ON Children.child IN (EMP.child_1, EMP.child_2, EMP.child_3)    GROUP BY EMP.employee""", conn)
  employee child_cnt
0 吉田 0
1 工藤 2
2 赤井 3
3 铃木 1

在交叉表里制作嵌套式表侧栏

# 创建相关的表conn.execute("""    CREATE TABLE IF NOT EXISTS TblAge (        age_class integer PRIMARY KEY,        age_range varchar(20)    );""")conn.execute("""    CREATE TABLE IF NOT EXISTS TblSex (        sex_cd char(1) PRIMARY KEY,        sex char(1)    );""")conn.execute("""    CREATE TABLE IF NOT EXISTS TblPop (        pref_name VARCHAR(10),        age_class integer,        sex_cd char(1),        population integer,                PRIMARY KEY (pref_name, age_class, sex_cd),        FOREIGN KEY (age_class) REFERENCES TblAge(age_class),        FOREIGN KEY (sex_cd) REFERENCES TblSex(sex_cd)    );""")# 确保没有数据conn.execute("""DELETE FROM TblPop""")conn.execute("""DELETE FROM TblSex""")conn.execute("""DELETE FROM TblAge""")# 插入数据conn.execute("""    INSERT INTO TblAge(age_class, age_range) VALUES        (1, '21岁 ~ 30岁'),        (2, '31岁 ~ 40岁'),        (3, '41岁 ~ 50岁')""")conn.execute("""    INSERT INTO TblSex(sex_cd, sex) VALUES        ('m', '男'),        ('f', '女')""")conn.execute("""    INSERT INTO TblPop(pref_name, age_class, sex_cd, population) VALUES        ('秋田', 1, 'm', 400),        ('秋田', 3, 'm', 1000),        ('秋田', 1, 'f', 800),        ('秋田', 3, 'f', 1000),        ('青森', 1, 'm', 700),        ('青森', 1, 'f', 500),        ('青森', 3, 'f', 800),        ('东京', 1, 'm', 1500),        ('东京', 1, 'f', 1200),        ('千叶', 1, 'm', 900),        ('千叶', 1, 'f', 1000),        ('千叶', 3, 'f', 900)""")
pd.read_sql("""    SELECT * FROM TblAge""", conn)
  age_class age_range
0 1 21岁 ~ 30岁
1 2 31岁 ~ 40岁
2 3 41岁 ~ 50岁
pd.read_sql("""    SELECT * FROM TblSex""", conn)
  sex_cd sex
0 m
1 f
pd.read_sql("""    SELECT * FROM TblPop""", conn)
  pref_name age_class sex_cd population
0 秋田 1 m 400
1 秋田 3 m 1000
2 秋田 1 f 800
3 秋田 3 f 1000
4 青森 1 m 700
5 青森 1 f 500
6 青森 3 f 800
7 东京 1 m 1500
8 东京 1 f 1200
9 千叶 1 m 900
10 千叶 1 f 1000
11 千叶 3 f 900
# 下面这种,以两张表作为外连接操作的方式。# 有时候并不能满足需求,比如不会显示age_class=2的情况,因为没有对应的人口数据pd.read_sql("""    SELECT MASTER1.age_class AS age_class,           MASTER2.sex_cd AS sex_cd,           DATA.pop_tohoku AS pop_tohoku,           DATA.pop_kanto AS pop_kanto    FROM (SELECT age_class, sex_cd,                 SUM(CASE WHEN pref_name IN ('青森', '秋田') THEN population ELSE NULL END) AS pop_tohoku,                 SUM(CASE WHEN pref_name IN ('东京', '千叶') THEN population ELSE NULL END) AS pop_kanto          FROM TblPop          GROUP BY age_class, sex_cd) AS DATA      LEFT OUTER JOIN TblAge MASTER1          ON MASTER1.age_class = DATA.age_class      LEFT OUTER JOIN TblSex MASTER2          ON MASTER2.sex_cd = DATA.sex_cd""", conn)
  age_class sex_cd pop_tohoku pop_kanto
0 1 f 1300 2200.0
1 1 m 1100 2400.0
2 3 f 1800 900.0
3 3 m 1000 NaN
# 解决方式是:调整为一次外连接# 外连接的表是 TblAge和TblSex 的笛卡尔积,也就是把它们用交叉连接(CROSS JOIN)组合起来,# 对于不支持CROSS JOIN的库,分别把两个表用FROM引入,也是一样的.# 同时,请注意下面把临时表放在了 LEFT OUTER JOIN 的左边,充分发挥OUTER JOIN的作用.pd.read_sql("""    SELECT MASTER.age_class AS age_class,           MASTER.sex_cd    AS sex_cd,           DATA.pop_tohoku  AS pop_tohoku,           DATA.pop_kanto   AS pop_kanto    FROM (SELECT age_class, sex_cd          FROM TblAge CROSS JOIN TblSex) AS MASTER      LEFT OUTER JOIN              (SELECT age_class, sex_cd,                         SUM(CASE WHEN pref_name IN ('青森', '秋田') THEN population ELSE NULL END) AS pop_tohoku,                         SUM(CASE WHEN pref_name IN ('东京', '千叶') THEN population ELSE NULL END) AS pop_kanto              FROM TblPop              GROUP BY age_class, sex_cd) AS DATA          ON MASTER.age_class = DATA.age_class          AND MASTER.sex_cd = DATA.sex_cd      """, conn)
  age_class sex_cd pop_tohoku pop_kanto
0 1 f 1300.0 2200.0
1 1 m 1100.0 2400.0
2 2 f NaN NaN
3 2 m NaN NaN
4 3 f 1800.0 900.0
5 3 m 1000.0 NaN
# 更优的解决方法是:把JOIN看作乘法运算# 关键在于,TblPop可以看作就是DATA。然后 MASTER -> DATA 的关系,就是一对多关系了pd.read_sql("""    SELECT MASTER.age_class AS age_class,           MASTER.sex_cd    AS sex_cd,           SUM(CASE WHEN pref_name IN ('青森', '秋田') THEN population ELSE NULL END) AS pop_tohoku,           SUM(CASE WHEN pref_name IN ('东京', '千叶') THEN population ELSE NULL END) AS pop_kanto    FROM (SELECT age_class, sex_cd          FROM TblAge CROSS JOIN TblSex) AS MASTER      LEFT OUTER JOIN TblPop AS DATA      ON MASTER.age_class = DATA.age_class      AND MASTER.sex_cd = DATA.sex_cd    GROUP BY MASTER.age_class, MASTER.sex_cd""", conn)
  age_class sex_cd pop_tohoku pop_kanto
0 1 f 1300.0 2200.0
1 1 m 1100.0 2400.0
2 2 f NaN NaN
3 2 m NaN NaN
4 3 f 1800.0 900.0
5 3 m 1000.0 NaN

用外连接做集合运算

各个数据库的集合运算实现都不尽相同,参差不齐。

集合运算符会尽心排序,所以可能带来性能上的问题。

因此,了解一下集合运算符的替代方案还是有意义的。

# 创建数据库conn.execute("""    CREATE TABLE IF NOT EXISTS Class_A (        id integer PRIMARY KEY AUTOINCREMENT,        name varchar(10)    );""")conn.execute("""       CREATE TABLE IF NOT EXISTS Class_B (        id integer PRIMARY KEY AUTOINCREMENT,        name varchar(10)    )""")# 插入数据conn.execute("""DELETE FROM Class_A;""")conn.execute("""DELETE FROM Class_B;""")conn.execute("""    INSERT INTO Class_A(id, name) VALUES         (1, '田中'),        (2, '铃木'),        (3, '伊集院')""")conn.execute("""    INSERT INTO Class_B(id, name) VALUES        (1, '田中'),        (2, '铃木'),        (4, '西院寺')""")
pd.read_sql("""    SELECT * FROM Class_A""", conn)
  id name
0 1 田中
1 2 铃木
2 3 伊集院
pd.read_sql("""    SELECT * FROM Class_B""", conn)
  id name
0 1 田中
1 2 铃木
2 4 西院寺

用外连接求差集: A - B

pd.read_sql("""    SELECT A.id id, A.name AS A_name    FROM Class_A AS A LEFT OUTER JOIN Class_B AS B    ON A.id = B.id    WHERE B.name IS NULL""", conn)
  id A_name
0 3 伊集院

用外连接求差集: B - A

pd.read_sql("""    SELECT B.id id, B.name AS B_name    FROM Class_B AS B LEFT OUTER JOIN Class_A AS A    ON A.id = B.id    WHERE A.name IS NULL""", conn)
  id B_name
0 4 西院寺

用外连接求异或集

pd.read_sql("""    SELECT A.id id, A.name AS name    FROM Class_A AS A LEFT OUTER JOIN Class_B AS B    ON A.id = B.id    WHERE B.name IS NULL        UNION        SELECT B.id id, B.name AS name    FROM Class_B AS B LEFT OUTER JOIN Class_A AS A    ON A.id = B.id    WHERE A.name IS NULL""", conn)
  id name
0 3 伊集院
1 4 西院寺

转载于:https://www.cnblogs.com/thomaszdxsn/p/15-wai-lian-jie-de-yong-fa--sql-jin-jie-jiao-cheng.html

你可能感兴趣的文章
搜狗输入法皮肤安装 分类: windows常用小技巧 ...
查看>>
AtCoder Beginner Contest 073 赛后反思与总结
查看>>
关于新手html的认识 以及对table的基本用法
查看>>
都是权限惹的祸!
查看>>
编译原理课堂笔记(1)编译概述
查看>>
第四周作业_2013551605
查看>>
爱奇艺笔试题
查看>>
HTML5和CSS3的新特性
查看>>
C# HttpHelper 采集
查看>>
JSON转Map
查看>>
ios 下锁使用
查看>>
用grunt搭建自动化的web前端开发环境-完整教程
查看>>
今天开始正式学前端
查看>>
bzoj 3540: [Usaco2014 Open]Fair Photography
查看>>
Android spinner默认样式不支持换行和修改字体样式的解决方法
查看>>
ajax标准格式
查看>>
高薪技术
查看>>
bootstrap collapse 无法收回
查看>>
GuessNumber
查看>>
IDL软件初步了解
查看>>