摘要:Python作为一门简洁优美且功能强大的语言,越来越受编程人员的喜欢。在工业界和学术界也是非常受欢迎的编程语言。在python处理数据中,需要数据分析,我们直面冷冰冰的数值或者信息很难得到直观的感受,如果借助图形化分析,常常可以一目了然,所谓一图胜千言就是这个意思。本文对可视化库matplotlib介绍,方便大家掌握。(本文原创,转载必须注明出处.)
matplotlib绘制折线图
绘制y=2x+1的折线图
import matplotlib
import matplotlib.pyplot as plt
#加入中文显示
import matplotlib.font_manager as fm
# 解决中文乱码,本案例使用宋体字
myfont=fm.FontProperties(fname=r"C:\\Windows\\Fonts\\simsun.ttc")
def line_chart(xvalues,yvalues):
# 绘制折线图,c颜色设置,alpha透明度
plt.plot(xvalues,yvalues,linewidth=5,alpha=0.5,c='red') # num_squares数据值,linewidth设置线条粗细
# 设置折线图标题和横纵坐标标题
plt.title("Python绘制折线图",fontsize=30,fontname='宋体',fontproperties=myfont)
plt.xlabel('横坐标',fontsize=20,fontname='宋体',fontproperties=myfont)
plt.ylabel('纵坐标',fontsize=20,fontname='宋体',fontproperties=myfont)
# 设置刻度标记大小,axis='both'参数影响横纵坐标,labelsize刻度大小
plt.tick_params(axis='both',labelsize=10)
# 显示图形
plt.show()
if __name__ == "__main__":
# 1 打印y=2X+1 的折线图
line_chart([0,2,3,4,5],[1,7,10,13,16])
## 运行结果

***
# matplotlib绘制散点图
## 绘制y=2x+1的散点图
import matplotlib
import matplotlib.pyplot as plt
#加入中文显示
import matplotlib.font_manager as fm
# 解决中文乱码,本案例使用宋体字
myfont=fm.FontProperties(fname=r"C:\\Windows\\Fonts\\simsun.ttc")
def scatter_chart(xvalues,yvalues):
# 绘制散点图,s设置点的大小,c数据点的颜色,edgecolors数据点的轮廓
plt.scatter(xvalues,yvalues,c='green',edgecolors='none',s=40)
# 设置散点图标题和横纵坐标标题
plt.title("Python绘制折线图",fontsize=30,fontname='宋体',fontproperties=myfont)
plt.xlabel('横坐标',fontsize=20,fontname='宋体',fontproperties=myfont)
plt.ylabel('纵坐标',fontsize=20,fontname='宋体',fontproperties=myfont)
# 设置刻度标记大小,axis='both'参数影响横纵坐标,labelsize刻度大小
plt.tick_params(axis='both',which='major',labelsize=10)
# 设置每个坐标轴取值范围
# plt.axis([80,100,6400,10000])
# 显示图形
plt.show()
# 自动保存图表,bbox_inches剪除图片空白区
# plt.savefig('squares_plot.png',bbox_inches='tight')
if __name__ == "__main__":
# 1 绘制y=2X+1 的折线图
scatter_chart([0,2,3,4,5],[1,7,10,13,16])
## 运行结果

***
# matplotlib读取csv文件显示折线图
## 加利福尼亚死亡谷日气温最高最低图
import csv
from datetime import datetime
from matplotlib import pyplot as plt
import matplotlib as mpl
# 解决中文乱码问题
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
# Get dates, high, and low temperatures from file.
filename = 'death_valley_2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# print(header_row)
# for index,column_header in enumerate(header_row):
# print(index,column_header)
dates, highs,lows = [],[], []
for row in reader:
try:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
high = int(row[1])
low = int(row[3])
except ValueError: # 处理
print(current_date, 'missing data')
else:
dates.append(current_date)
highs.append(high)
lows.append(low)
# 汇制数据图形
fig = plt.figure(dpi=120,figsize=(10,6))
plt.plot(dates,highs,c='red',alpha=0.5)# alpha指定透明度
plt.plot(dates,lows,c='blue',alpha=0.5)
plt.fill_between(dates,highs,lows,facecolor='orange',alpha=0.1)#接收一个x值系列和y值系列,给图表区域着色
#设置图形格式
plt.title('2014年加利福尼亚死亡谷日气温最高最低图',fontsize=24)
plt.xlabel('日(D)',fontsize=16)
fig.autofmt_xdate() # 绘制斜体日期标签
plt.ylabel('温度(F)',fontsize=16)
plt.tick_params(axis='both',which='major',labelsize=16)
# plt.axis([0,31,54,72]) # 自定义数轴起始刻度
plt.savefig('highs_lows.png',bbox_inches='tight')
plt.show()
## 运行结果

***
# matplotlib生成随机漫步图
## 随机数据生成
from random import choice
class RandomWalk():
'''一个生成随机漫步数据的类'''
def __init__(self,num_points=5000):
'''初始化随机漫步属性'''
self.num_points = num_points
self.x_values = [0]
self.y_values = [0]
def fill_walk(self):
'''计算随机漫步包含的所有点'''
while len(self.x_values)
生成随机漫步图
#-*- coding: utf-8 -*-
#coding=utf-8
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab
from random_walk import RandomWalk
# 解决中文乱码问题
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
# 创建RandomWalk实例
rw = RandomWalk()
rw.fill_walk()
plt.figure(figsize=(10,6))
point_numbers = list(range(rw.num_points))
# 随着点数的增加渐变深红色
plt.scatter(rw.x_values,rw.y_values,c=point_numbers,cmap=plt.cm.Reds,edgecolors='none',s=1)
# 设置起始点和终点颜色
plt.scatter(0,0,c='green',edgecolors='none',s=100)
plt.scatter(rw.x_values[-1],rw.y_values[-1],c='blue',edgecolors='none',s=100)
# 设置标题和纵横坐标
plt.title('随机漫步图',fontsize=24)
plt.xlabel('左右步数',fontsize=14)
plt.ylabel('上下步数',fontsize=14)
# 隐藏坐标轴
plt.axes().get_xaxis().set_visible(False)
plt.axes().get_yaxis().set_visible(False)
plt.show()
## 运行结果

***
# matplotlib绘制世界各国人口统计图
## 绘制世界各国人口统计图
#conding = utf-8
import json
from matplotlib import pyplot as plt
import matplotlib as mpl
from country_codes import get_country_code
import pygal
from pygal.style import RotateStyle
from pygal.style import LightColorizedStyle
# 解决中文乱码问题
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
# 加载json数据
filename='population_data.json'
with open(filename) as f:
pop_data = json.load(f)
# print(pop_data[1])
# 创建一个包含人口的字典
cc_populations={}
# cc1_populations={}
# 打印每个国家2010年的人口数量
for pop_dict in pop_data:
if pop_dict['Year'] == '2010':
country_name = pop_dict['Country Name']
population = int(float(pop_dict['Value'])) # 字符串数值转化为整数
# print(country_name + ":" + str(population))
code = get_country_code(country_name)
if code:
cc_populations[code] = population
# elif pop_dict['Year'] == '2009':
# country_name = pop_dict['Country Name']
# population = int(float(pop_dict['Value'])) # 字符串数值转化为整数
# # print(country_name + ":" + str(population))
# code = get_country_code(country_name)
# if code:
# cc1_populations[code] = population
cc_pops_1,cc_pops_2,cc_pops_3={},{},{}
for cc,pop in cc_populations.items():
if pop <10000000: cc_pops_1[cc]="pop" elif="" pop<1000000000:="" cc_pops_2[cc]="pop" else:="" cc_pops_3[cc]="pop" #="" print(len(cc_pops_1),len(cc_pops_2),len(cc_pops_3))="" wm_style="RotateStyle('#336699',base_style=LightColorizedStyle)" wm="pygal.maps.world.World(style=wm_style)" wm.title="2010年世界各国人口统计图" wm.add('0-10m',="" cc_pops_1)="" wm.add('10m-1bm',cc_pops_2)="" wm.add('="">1bm',cc_pops_3)
# wm.add('2009', cc1_populations)
wm.render_to_file('world_populations.svg')
10000000:>
运行结果

matplotlib绘制直方图
绘制带有详细信息的直方图
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS
my_style = LS('#333366', base_style=LCS)
chart = pygal.Bar(style=my_style, x_label_rotation=45, show_legend=False)
chart.title = 'Python Projects'
chart.x_labels = ['httpie', 'django', 'flask']
plot_dicts = [
{'value': 16101, 'label': 'Description of httpie.'},
{'value': 15028, 'label': 'Description of django.'},
{'value': 14798, 'label': 'Description of flask.'},
]
chart.add('', plot_dicts)
chart.render_to_file('bar_descriptions.svg')
运行结果
绘制两次随机骰子的直方图
#coding=utf-8
from die import Die
import pygal
import matplotlib as mpl
# 解决中文乱码问题
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
die1 = Die()
die2 = Die()
results = []
for roll_num in range(1000):
result =die1.roll()+die2.roll()
results.append(result)
# print(results)
# 分析结果
frequencies = []
max_result = die1.num_sides+die2.num_sides
for value in range(2,max_result+1):
frequency = results.count(value)
frequencies.append(frequency)
print(frequencies)
# 直方图
hist = pygal.Bar(figsize=(8,6))
hist.title = '骰子投掷1000次各面结果统计图'
hist.x_labels =[x for x in range(2,max_result+1)]
hist.x_title ='结果'
hist.y_title = '结果分布'
hist.add('D6+D6',frequencies)
hist.render_to_file('die_visual.svg')
# hist.show()
运行结果

matplotlib:Github最受欢迎的星标项目可视化
Github最受欢迎的星标项目可视化
# coding=utf-8
import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS
# Make an API call, and store the response.
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
r = requests.get(url)
print("Status code:", r.status_code) # 查看请求是否成功,200表示成功
response_dict = r.json()
# print(response_dict.keys())
print("Total repositories:", response_dict['total_count'])
# Explore information about the repositories.
repo_dicts = response_dict['items']
print("Repositories returned:",len(repo_dicts))
# 查看项目信息
# repo_dict =repo_dicts[0]
# print('\n\neach repository:')
# for repo_dict in repo_dicts:
# print("\nName:",repo_dict['name'])
# print("Owner:",repo_dict['owner']['login'])
# print("Stars:",repo_dict['stargazers_count'])
# print("Repository:",repo_dict['html_url'])
# print("Description:",repo_dict['description'])
# 查看每个项目的键
# print('\nKeys:',len(repo_dict))
# for key in sorted(repo_dict.keys()):
# print(key)
names, plot_dicts = [], []
for repo_dict in repo_dicts:
names.append(repo_dict['name'])
plot_dicts.append(repo_dict['stargazers_count'])
# 可视化
my_style = LS('#333366', base_style=LCS)
my_config = pygal.Config() # Pygal类Config实例化
my_config.x_label_rotation = 45 # x轴标签旋转45度
my_config.show_legend = False # show_legend隐藏图例
my_config.title_font_size = 24 # 设置图标标题主标签副标签的字体大小
my_config.label_font_size = 14
my_config.major_label_font_size = 18
my_config.truncate_label = 15 # 较长的项目名称缩短15字符
my_config.show_y_guides = False # 隐藏图表中的水平线
my_config.width = 1000 # 自定义图表的宽度
chart = pygal.Bar(my_config, style=my_style)
chart.title = 'Most-Starred Python Projects on GitHub'
chart.x_labels = names
chart.add('', plot_dicts)
chart.render_to_file('python_repos.svg')
运行结果

参考文献
- Python官网
- 中文维基百科
- GitHub
- 图书:《机器学习实战》
- 图书:《自然语言处理理论与实战》
完整代码下载

作者声明
本文版权归作者所有,旨在技术交流使用。未经作者同意禁止转载,转载后需在文章页面明显位置给出原文连接,否则相关责任自行承担。
