">

简明的Python教程之matplotlib可视化

摘要:Python作为一门简洁优美且功能强大的语言,越来越受编程人员的喜欢。在工业界和学术界也是非常受欢迎的编程语言。在python处理数据中,需要数据分析,我们直面冷冰冰的数值或者信息很难得到直观的感受,如果借助图形化分析,常常可以一目了然,所谓一图胜千言就是这个意思。本文对可视化库matplotlib介绍,方便大家掌握。(本文原创,转载必须注明出处.)

matplotlib绘制折线图

绘制y=2x+1的折线图

import matplotlib
import matplotlib.pyplot as plt
#加入中文显示
import  matplotlib.font_manager as fm

# 解决中文乱码,本案例使用宋体字
myfont=fm.FontProperties(fname=r"C:\\Windows\\Fonts\\simsun.ttc")

def line_chart(xvalues,yvalues):
    # 绘制折线图,c颜色设置,alpha透明度
    plt.plot(xvalues,yvalues,linewidth=5,alpha=0.5,c='red') # num_squares数据值,linewidth设置线条粗细

    # 设置折线图标题和横纵坐标标题
    plt.title("Python绘制折线图",fontsize=30,fontname='宋体',fontproperties=myfont)
    plt.xlabel('横坐标',fontsize=20,fontname='宋体',fontproperties=myfont)
    plt.ylabel('纵坐标',fontsize=20,fontname='宋体',fontproperties=myfont)

    # 设置刻度标记大小,axis='both'参数影响横纵坐标,labelsize刻度大小
    plt.tick_params(axis='both',labelsize=10)

    # 显示图形
    plt.show()

if __name__ == "__main__":
    # 1 打印y=2X+1 的折线图
    line_chart([0,2,3,4,5],[1,7,10,13,16])
## 运行结果 ![](https://i.imgur.com/A3omLb0.png) *** # matplotlib绘制散点图 ## 绘制y=2x+1的散点图
import matplotlib
import matplotlib.pyplot as plt
#加入中文显示
import  matplotlib.font_manager as fm

# 解决中文乱码,本案例使用宋体字
myfont=fm.FontProperties(fname=r"C:\\Windows\\Fonts\\simsun.ttc")

def scatter_chart(xvalues,yvalues):
    # 绘制散点图,s设置点的大小,c数据点的颜色,edgecolors数据点的轮廓
    plt.scatter(xvalues,yvalues,c='green',edgecolors='none',s=40)

    # 设置散点图标题和横纵坐标标题
    plt.title("Python绘制折线图",fontsize=30,fontname='宋体',fontproperties=myfont)
    plt.xlabel('横坐标',fontsize=20,fontname='宋体',fontproperties=myfont)
    plt.ylabel('纵坐标',fontsize=20,fontname='宋体',fontproperties=myfont)

    # 设置刻度标记大小,axis='both'参数影响横纵坐标,labelsize刻度大小
    plt.tick_params(axis='both',which='major',labelsize=10)

    # 设置每个坐标轴取值范围
    # plt.axis([80,100,6400,10000])

    # 显示图形
    plt.show()

    # 自动保存图表,bbox_inches剪除图片空白区
    # plt.savefig('squares_plot.png',bbox_inches='tight')

if __name__ == "__main__":
    # 1 绘制y=2X+1 的折线图
    scatter_chart([0,2,3,4,5],[1,7,10,13,16])
## 运行结果 ![](https://i.imgur.com/6ZTgFhT.png) *** # matplotlib读取csv文件显示折线图 ## 加利福尼亚死亡谷日气温最高最低图
import csv
from datetime import datetime
from matplotlib import pyplot as plt
import matplotlib as mpl

# 解决中文乱码问题
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False

# Get dates, high, and low temperatures from file.
filename = 'death_valley_2014.csv'
with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    # print(header_row)

    # for index,column_header in enumerate(header_row):
    #     print(index,column_header)

    dates, highs,lows = [],[], []
    for row in reader:
        try:
            current_date = datetime.strptime(row[0], "%Y-%m-%d")
            high = int(row[1])
            low = int(row[3])
        except ValueError: # 处理
            print(current_date, 'missing data')
        else:
            dates.append(current_date)
            highs.append(high)
            lows.append(low)

# 汇制数据图形
fig = plt.figure(dpi=120,figsize=(10,6))
plt.plot(dates,highs,c='red',alpha=0.5)# alpha指定透明度
plt.plot(dates,lows,c='blue',alpha=0.5)
plt.fill_between(dates,highs,lows,facecolor='orange',alpha=0.1)#接收一个x值系列和y值系列,给图表区域着色

#设置图形格式
plt.title('2014年加利福尼亚死亡谷日气温最高最低图',fontsize=24)
plt.xlabel('日(D)',fontsize=16)
fig.autofmt_xdate() # 绘制斜体日期标签
plt.ylabel('温度(F)',fontsize=16)
plt.tick_params(axis='both',which='major',labelsize=16)
# plt.axis([0,31,54,72]) # 自定义数轴起始刻度
plt.savefig('highs_lows.png',bbox_inches='tight')

plt.show()
## 运行结果 ![](https://i.imgur.com/X6HpFU4.png) *** # matplotlib生成随机漫步图 ## 随机数据生成

from random import choice

class RandomWalk():
    '''一个生成随机漫步数据的类'''
    def __init__(self,num_points=5000):
        '''初始化随机漫步属性'''
        self.num_points = num_points
        self.x_values = [0]
        self.y_values = [0]

    def fill_walk(self):
        '''计算随机漫步包含的所有点'''
        while len(self.x_values)

生成随机漫步图

#-*- coding: utf-8 -*-
#coding=utf-8
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab
from random_walk  import RandomWalk

# 解决中文乱码问题
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False

# 创建RandomWalk实例
rw = RandomWalk()
rw.fill_walk()

plt.figure(figsize=(10,6))

point_numbers = list(range(rw.num_points))

# 随着点数的增加渐变深红色
plt.scatter(rw.x_values,rw.y_values,c=point_numbers,cmap=plt.cm.Reds,edgecolors='none',s=1)

# 设置起始点和终点颜色
plt.scatter(0,0,c='green',edgecolors='none',s=100)
plt.scatter(rw.x_values[-1],rw.y_values[-1],c='blue',edgecolors='none',s=100)

# 设置标题和纵横坐标
plt.title('随机漫步图',fontsize=24)
plt.xlabel('左右步数',fontsize=14)
plt.ylabel('上下步数',fontsize=14)

# 隐藏坐标轴
plt.axes().get_xaxis().set_visible(False)
plt.axes().get_yaxis().set_visible(False)

plt.show()
## 运行结果 ![](https://i.imgur.com/j0CrnIQ.png) *** # matplotlib绘制世界各国人口统计图 ## 绘制世界各国人口统计图
#conding = utf-8
import json
from matplotlib import pyplot as plt
import matplotlib as mpl
from country_codes import get_country_code
import pygal
from pygal.style import RotateStyle
from pygal.style import LightColorizedStyle
# 解决中文乱码问题
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False

# 加载json数据
filename='population_data.json'
with open(filename) as f:
    pop_data = json.load(f)
    # print(pop_data[1])


# 创建一个包含人口的字典
cc_populations={}
# cc1_populations={}

# 打印每个国家2010年的人口数量
for pop_dict in pop_data:
    if pop_dict['Year'] == '2010':
        country_name = pop_dict['Country Name']
        population = int(float(pop_dict['Value'])) # 字符串数值转化为整数
        # print(country_name + ":" + str(population))
        code = get_country_code(country_name)
        if code:
            cc_populations[code] = population
    # elif pop_dict['Year'] == '2009':
    #     country_name = pop_dict['Country Name']
    #     population = int(float(pop_dict['Value'])) # 字符串数值转化为整数
    #     # print(country_name + ":" + str(population))
    #     code = get_country_code(country_name)
    #     if code:
    #         cc1_populations[code] = population

cc_pops_1,cc_pops_2,cc_pops_3={},{},{}
for cc,pop in cc_populations.items():
    if pop <10000000: cc_pops_1[cc]="pop" elif="" pop<1000000000:="" cc_pops_2[cc]="pop" else:="" cc_pops_3[cc]="pop" #="" print(len(cc_pops_1),len(cc_pops_2),len(cc_pops_3))="" wm_style="RotateStyle('#336699',base_style=LightColorizedStyle)" wm="pygal.maps.world.World(style=wm_style)" wm.title="2010年世界各国人口统计图" wm.add('0-10m',="" cc_pops_1)="" wm.add('10m-1bm',cc_pops_2)="" wm.add('="">1bm',cc_pops_3)
# wm.add('2009', cc1_populations)

wm.render_to_file('world_populations.svg')

运行结果

matplotlib绘制直方图

绘制带有详细信息的直方图

import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS

my_style = LS('#333366', base_style=LCS)
chart = pygal.Bar(style=my_style, x_label_rotation=45, show_legend=False)

chart.title = 'Python Projects'
chart.x_labels = ['httpie', 'django', 'flask']

plot_dicts = [
    {'value': 16101, 'label': 'Description of httpie.'},
    {'value': 15028, 'label': 'Description of django.'},
    {'value': 14798, 'label': 'Description of flask.'},
    ]

chart.add('', plot_dicts)
chart.render_to_file('bar_descriptions.svg')

运行结果

  

绘制两次随机骰子的直方图

#coding=utf-8
from die import Die
import pygal
import matplotlib as mpl
# 解决中文乱码问题
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False


die1 = Die()
die2 = Die()
results = []
for roll_num in range(1000):
    result =die1.roll()+die2.roll()
    results.append(result)
# print(results)

# 分析结果
frequencies = []
max_result = die1.num_sides+die2.num_sides
for value in range(2,max_result+1):
    frequency = results.count(value)
    frequencies.append(frequency)
print(frequencies)

# 直方图
hist = pygal.Bar(figsize=(8,6))

hist.title = '骰子投掷1000次各面结果统计图'
hist.x_labels =[x for x in range(2,max_result+1)]
hist.x_title ='结果'
hist.y_title = '结果分布'

hist.add('D6+D6',frequencies)
hist.render_to_file('die_visual.svg')
# hist.show()

运行结果


matplotlib:Github最受欢迎的星标项目可视化

Github最受欢迎的星标项目可视化

# coding=utf-8
import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS


# Make an API call, and store the response.
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
r = requests.get(url)
print("Status code:", r.status_code) # 查看请求是否成功,200表示成功

response_dict = r.json()
# print(response_dict.keys())
print("Total repositories:", response_dict['total_count'])

# Explore information about the repositories.
repo_dicts = response_dict['items']
print("Repositories returned:",len(repo_dicts))

# 查看项目信息
# repo_dict =repo_dicts[0]
# print('\n\neach repository:')
# for repo_dict in repo_dicts:
#     print("\nName:",repo_dict['name'])
#     print("Owner:",repo_dict['owner']['login'])
#     print("Stars:",repo_dict['stargazers_count'])
#     print("Repository:",repo_dict['html_url'])
#     print("Description:",repo_dict['description'])
# 查看每个项目的键
# print('\nKeys:',len(repo_dict))
# for key in sorted(repo_dict.keys()):
#     print(key)

names, plot_dicts = [], []
for repo_dict in repo_dicts:
    names.append(repo_dict['name'])
    plot_dicts.append(repo_dict['stargazers_count'])

# 可视化
my_style = LS('#333366', base_style=LCS)

my_config = pygal.Config() # Pygal类Config实例化
my_config.x_label_rotation = 45 # x轴标签旋转45度
my_config.show_legend = False # show_legend隐藏图例
my_config.title_font_size = 24 # 设置图标标题主标签副标签的字体大小
my_config.label_font_size = 14
my_config.major_label_font_size = 18
my_config.truncate_label = 15 # 较长的项目名称缩短15字符
my_config.show_y_guides = False # 隐藏图表中的水平线
my_config.width = 1000 # 自定义图表的宽度

chart = pygal.Bar(my_config, style=my_style)
chart.title = 'Most-Starred Python Projects on GitHub'
chart.x_labels = names
chart.add('', plot_dicts)
chart.render_to_file('python_repos.svg')

运行结果


参考文献

  1. Python官网
  2. 中文维基百科
  3. GitHub
  4. 图书:《机器学习实战》
  5. 图书:《自然语言处理理论与实战》

完整代码下载

源码请进【机器学习和自然语言QQ群:436303759】文件下载:自然语言处理和机器学习技术QQ交流

作者声明

本文版权归作者所有,旨在技术交流使用。未经作者同意禁止转载,转载后需在文章页面明显位置给出原文连接,否则相关责任自行承担。

白宁超 wechat
扫一扫关注微信公众号,机器学习和自然语言处理,订阅号datathinks!