先看效果:
源码(背景图、51job 软件测试 采集结果请自备或联系博主获取):
import csv import jieba import matplotlib.pyplot as plt import numpy as np from PIL import Image from wordcloud import WordCloud, ImageColorGenerator, STOPWORDS # 1.读取文件内容 csv_file = open(r'examples/wc_cn/job_51_2020-04-16.csv', 'r', encoding='utf-8') reader = csv.reader(csv_file) title = next(reader) content = '' for line in reader: info = line[3] info = info.replace('微信分享', '').replace('Python', '').replace('python', '') \ .replace('工程师', '').replace('职能', '').replace('类别', '').replace('岗位职责', '').replace('职位诱惑', '')\ .replace('以上学历', '').replace('关键字', '').replace('软件测试', '') content += info csv_file.close() # 2.jieba分词 # ret = jieba.cut(content, cut_all=True) ret = jieba.cut_for_search(content) ret = ' '.join(ret) # 3.制作词云 bg = np.array(Image.open('mzss.jpg')) # 背景图片 wc = WordCloud( background_color='white', # 图片背景 mask=bg, # 背景图片 max_words=200, # 最大分词数量 stopwords=STOPWORDS, # 停止的默认词语 font_path='SIMHEI.TTF', # 自定义中文字体路径 max_font_size=100, # 最大字体尺寸 random_state=50, # 随机角度 横竖 scale=1, ).generate(ret) icg = ImageColorGenerator(bg) # 字体随机颜色从bg中截取 plt.imshow(wc) plt.axis('off') # plt.show() plt.savefig(__file__ + '.png') |
本文内容不用于商业目的,如涉及知识产权问题,请权利人联系博为峰小编(021-64471599-8017),我们将立即处理