python对图片的像素识别及处理
python对图片的像素识别及处理
滑动验证码位置确定:
图片示例:
from PIL import Imagedef recognize_box(img_path, img_width):"""滑块验证图片的路径; 图片的大小:param img_path::param img_width::return:"""im = Image.open(img_path)im = im.convert('L')rows, cols = im.sizerow_dot_list = []for i in range(rows):col_dot_list =[]for j in range(cols):if im.getpixel((i,j)) < 50: # 灰度后趋向于黑色,一般不会大于50col_dot_list.append(j)col_dot_list.sort()if len(get_skip_frame(1, col_dot_list)) >= img_width:row_dot_list.append(i)row_dot_list.sort()move_length = get_skip_frame(1, row_dot_list)if len(move_length) >= img_width:python对图片的像素识别及处理print(move_length[0])def get_skip_frame(skip, frame_list):"""间隔数, 连续数据列表:param skip::param frame_list::return:"""tmp_list = []return_list = []for i in range(len(frame_list)):if len(tmp_list) == 0:tmp_list.append(frame_list[i])else:change = abs(frame_list[i] - tmp_list[-1])if change == skip:tmp_list.append(frame_list[i])else:if len(return_list) < len(tmp_list):return_list = tmp_listtmp_list = []tmp_list.append(frame_list[i])if len(return_list) < len(tmp_list):return_list = tmp_listreturn return_listif __name__ == '__main__':recognize_box("3.jpg", 245)
对图片位置及层级关系进行识别:(像素点)

import requests
import base64
from PIL import Image
import numpy as np
import jsondef get_first_dot(image):"""获取颜色的分布 0是黑色 255 白色为了扩展性更好,通过一行的长度最好:param image::return:"""pixel_dict = []rows, cols = image.sizefor i in range(cols):n = 0color_list = []color_max = []for j in range(rows):if image.getpixel((j, i)) > 0 and image.getpixel((j, i)) < 200:color_list.append((j, i))color_max.append(image.getpixel((j, i)))# max = sorted(dict(zip(*np.unique(color_max, return_counts=True))).items(), key=lambda x:x[1])# print("xx", max)if len(color_list) > 250:# print(color_list)max_count = sorted(dict(zip(*np.unique(color_max, return_counts=True))).items(), key=lambda x: x[1])# print(max)# print(max[len(max) - 1:][0])max_pixel_list = []for m in color_list:if image.getpixel(m) == max_count[len(max_count) - 1:][0][0]:max_pixel_list.append(m)# print(max_pixel_list)first_location = sorted(max_pixel_list, key=lambda x: x[0])[0]pixel_dict.append(first_location)# print(pixel_dict)result = []for n in range(len(pixel_dict) - 1):if len(result) == 0:result.append(pixel_dict[n])if pixel_dict[n + 1][1] - pixel_dict[n][1] > 20:result.append(pixel_dict[n + 1])print(result)return resultdef del_threshold_noise(image, threshold):"""直接通过阈值去除浅色点(作为第一步),也要先转化为灰度:return:"""pixel_matrix = image.load() # load之后可以直接操作,相当于转化为数组了rows, cols = image.sizefor col in range(0, cols):for row in range(0, rows):if pixel_matrix[row, col] >= threshold:pixel_matrix[row, col] = 255# image.save("1.jpg")return imagedef write_txt(dot_positions, data):"""将层级关系写入txt:return:"""with open('1.txt', 'w') as f:for index, value in enumerate(dot_positions):if index == 0:f.write(data[index] + "\n")if index >= len(data) - 2:passelse:if dot_positions[index + 1][0] - dot_positions[index][0] > 0:f.write(data[index + 1] + "--" + "--" + "\n")else:f.write("--" + "--" + data[index + 1] + "\n")if __name__ == '__main__':img_path = '企业股权关系.jpg'im = Image.open(img_path)im = im.convert('L')# get_threshold(im)# 通过阈值先将杂点转化为白点,去除干扰; 范围:200-220im = del_threshold_noise(im, 200)dot_positions = get_first_dot(im)write_txt(dot_positions, "123")
对验证码处理:
处理之前:

处理之后:

因为验证码干扰较多,处理之后识别的精度大概为75%左右,下面为处理过程:
import io
import math
import os
import cv2
import numpy as np
from PIL import Image
from collections import defaultdictdef del_threshold_noise(image, threshold):"""直接通过阈值去除浅色点(作为第一步),也要先转化为灰度:return:"""pixel_matrix = image.load() # load之后可以直接操作,相当于转化为数组了rows, cols = image.sizefor col in range(0, cols):for row in range(0, rows):if pixel_matrix[row, col] >= threshold:pixel_matrix[row, col] = 255return imagedef get_threshold(image):"""获取像素点最多的像素(先转化为灰度),就是二值化:param image::return:"""pixel_dict = defaultdict(int)rows, cols = image.sizefor i in range(rows):for j in range(cols):pixel = image.getpixel((i, j))pixel_dict[pixel] += 1count_max = max(pixel_dict.values())pixel_dict_reverse = {v: k for k, v in pixel_dict.items()}threshold = pixel_dict_reverse[count_max]return thresholddef get_bin_table(threshold):"""按照阈值进行二值化(先转化为灰度后,再进行二值化):param threshold: 像素阈值:return:"""table = []for i in range(256): # 0~256rate = 0.1if threshold * (1 - rate) <= i <= threshold * (1 + rate):table.append(1) # 白色else:table.append(0) # 黑色# for i in range(256): # 或者不做判断,直接找个界限(只找出黑色内容即可)# if i < threshold:# table.append(0)# else:# table.append(1)return tabledef del_cut_noise(im_cut):'''通过颜色区分:将图切为小图,找第二多颜色的像素,从而去除干扰线(先转化为灰度,入参和出参都是ndarray格式)variable:bins:灰度直方图bin的数目num_gray:像素间隔method:1.找到灰度直方图中像素第二多所对应的像素2.计算mode3.除了在mode+-一定范围内的,全部变为空白。'''bins = 16 # 直方图柱子的数量,每个柱子都有一定的范围num_gray = math.ceil(256 / bins) # 像素间隔就是柱子的范围hist = cv2.calcHist([im_cut], [0], None, [bins], [0, 256])lists = []for i in range(len(hist)):# print hist[i][0]lists.append(hist[i][0])second_max = sorted(lists)[-2] # 第二多的像素bins_second_max = lists.index(second_max) # 第二多的像素是第几个柱子mode = (bins_second_max + 0.5) * num_gray # 取柱子的中间(平均),比如2.5, 总的结果就是:第二多的平均的像素for i in range(len(im_cut)):for j in range(len(im_cut[0])):if im_cut[i][j] < mode - 20 or im_cut[i][j] > mode + 20: # 数组可以直接操作# print im_cut[i][j]im_cut[i][j] = 255return im_cutdef del_dot_noise(image):"""干扰点降噪:param image::return:"""rows, cols = image.size # 图片的宽度和高度change_pos = [] # 记录噪声点位置# 遍历图片中的每个点,除掉边缘for i in range(1, rows - 1):for j in range(1, cols - 1):# pixel_set用来记录该店附近的黑色像素的数量pixel_set = []# 取该点的邻域为以该点为中心的九宫格for m in range(i - 1, i + 2):for n in range(j - 1, j + 2):if image.getpixel((m, n)) != 1: # 1为白色,0位黑色pixel_set.append(image.getpixel((m, n)))# 如果该位置的九宫内的黑色数量小于等于4,则判断为噪声if len(pixel_set) <= 4:change_pos.append((i, j))# 对相应位置进行像素修改,将噪声处的像素置为1(白色)for pos in change_pos:image.putpixel(pos, 1) # 找到之后一起删除,而不是一个个删除return imagedef remove_noise_line(image):"""去除验证码干扰线(操作像素点:随机应变):param image::return:"""try:width, height = image.sizetotal_list = []for i in range(width):dot_list = []noise_dot_list = []for j in range(height):if image.getpixel((i, j)) < 200:dot_list.append((i, j))if i == 0:if len(dot_list) == 1:total_list.append(dot_list[0])max = dot_list[0][1]min = dot_list[0][1]elif len(dot_list) == 2:if dot_list[1][1] == dot_list[0][1] + 1:total_list.append(dot_list[0])total_list.append(dot_list[1])max = dot_list[1][1]min = dot_list[0][1]elif abs(dot_list[0][1] - dot_list[1][1]) == 2:total_list.append(dot_list[0])total_list.append(dot_list[1])max = dot_list[1][1]min = dot_list[0][1]elif len(dot_list) == 3:if dot_list[1][1] == dot_list[0][1] + 1 and dot_list[2][1] == dot_list[0][1] + 2:total_list.append(dot_list[0])total_list.append(dot_list[1])total_list.append(dot_list[2])max = dot_list[2][1]min = dot_list[0][1]for m in dot_list:if m[1] in range(min - 1, max + 2):noise_dot_list.append(m)# if max + 2 - min > 8:# if m[1] in range(total_list[-2][1]-5, total_list[-2][1]+6):# noise_dot_list.append(m)# else:# if m[1] in range(min-1, max+2):# noise_dot_list.append(m)noise_dot_list1 = []noise_dot_list2 = []# print('noise_dot_list', noise_dot_list)if noise_dot_list:if len(noise_dot_list) == noise_dot_list[-1][1] - noise_dot_list[0][1] + 1:passelse:for index, value in enumerate(noise_dot_list):if index > len(noise_dot_list) - 2:breakif index == 0:if value[1] + 1 == noise_dot_list[index + 1][1] and value[1] + 2 != noise_dot_list[index + 2][1]:noise_dot_list1.append(value) if value not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 1]) if noise_dot_list[index + 1] not in noise_dot_list1 else 1elif index == len(noise_dot_list) - 2:if value[1] + 1 == noise_dot_list[index + 1][1]:noise_dot_list1.append(value) if value not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 1]) if noise_dot_list[index + 1] not in noise_dot_list1 else 1else:if value[1] + 1 == noise_dot_list[index + 1][1] and value[1] + 2 != noise_dot_list[index + 2][1] and value[1] - 1 != noise_dot_list[index - 1][1]:noise_dot_list1.append(value) if value not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 1]) if noise_dot_list[index + 1] not in noise_dot_list1 else 1for index, value in enumerate(noise_dot_list):if index > len(noise_dot_list) - 3:breakif index == 0:if value[1] + 1 == noise_dot_list[index + 1][1] and value[1] + 2 == noise_dot_list[index + 2][1] and value[1] + 3 != noise_dot_list[index + 3][1]:noise_dot_list1.append(value) if value not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 1]) if noise_dot_list[index + 1] not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 2]) if noise_dot_list[index + 2] not in noise_dot_list1 else 1elif index == len(noise_dot_list) - 3:if value[1] + 1 == noise_dot_list[index + 1][1] and value[1] + 2 == noise_dot_list[index + 2][1] and value[1] - 1 != noise_dot_list[index - 1][1]:noise_dot_list1.append(value) if value not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 1]) if noise_dot_list[index + 1] not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 2]) if noise_dot_list[index + 2] not in noise_dot_list1 else 1else:if value[1] + 1 == noise_dot_list[index + 1][1] and value[1] + 2 == noise_dot_list[index + 2][1] and value[1] + 3 != noise_dot_list[index + 3][1] and value[1] - 1 != noise_dot_list[index - 1][1]:noise_dot_list1.append(value) if value not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 1]) if noise_dot_list[index + 1] not in noise_dot_list1 else 1noise_dot_list1.append(noise_dot_list[index + 2]) if noise_dot_list[index + 2] not in noise_dot_list1 else 1# 找最近的两个或者三个# print('total_list', total_list)if noise_dot_list1:d_value = sorted([abs(total_list[-2][1] - l[1]) for l in noise_dot_list1])[0]mark = sorted([(total_list[-2][1] - l[1]) for l in noise_dot_list1])if d_value in mark:# print(total_list[-2][1] - d_value - 2)# print(total_list[-2][1] -d_value + 3)for i in noise_dot_list1:if i[1] in range(total_list[-2][1] - d_value - 2, total_list[-2][1] -d_value + 3):noise_dot_list2.append(i)else:# print(total_list[-2][1] + d_value - 2)# print(d_value + total_list[-2][1] + 3)for i in noise_dot_list1:if i[1] in range(total_list[-2][1] + d_value - 2, d_value + total_list[-2][1] + 3):noise_dot_list2.append(i)# print('noise_dot_list1', noise_dot_list1)# print('noise_dot_list2', noise_dot_list2)if not noise_dot_list2:count = 0if noise_dot_list[0][1] != 0 and noise_dot_list[-1][1] != 39:if image.getpixel((noise_dot_list[0][0], noise_dot_list[0][1] - 1)) < 200:count += 1if image.getpixel((noise_dot_list[-1][0], noise_dot_list[-1][1] + 1)) < 200:count += 1if len(noise_dot_list) + count < 4:for n in noise_dot_list:total_list.append(n)else:for n in noise_dot_list2:total_list.append(n)min = noise_dot_list[0][1]max = noise_dot_list[-1][1]else:pass# print(total_list)for pos in total_list:image.putpixel(pos, 255)except Exception as e:print(e)return imageif __name__ == '__main__':# 顺序:先灰度 --- 二值化 --- 降噪# (如果通过点或者线降噪,降噪可以在二值化后,如果通过颜色降噪,要在二值化之前)for i in os.listdir('test_images/'):print(i)im = Image.open('test_images/' + i)# im = Image.open(io.BytesIO(f)) # f位读取的二进制im = im.convert('L')im = del_threshold_noise(im, 120)# table = get_bin_table(240)# out = im.point(table, '1')# out = del_dot_noise(out)out = remove_noise_line(im)out.save('image_store/' + i)# im = Image.open('test_images/1566286922439.png')# im = im.convert('L')# out = del_threshold_noise(im, 120)# out.save('777.jpg')# out = remove_noise_line(im)# out.save('666.jpg')
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
