国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > Python > 正文

python驗證碼識別教程之利用滴水算法分割圖片

2020-02-15 21:38:54
字體:
來源:轉載
供稿:網(wǎng)友

滴水算法概述

滴水算法是一種用于分割手寫粘連字符的算法,與以往的直線式地分割不同 ,它模擬水滴的滾動,通過水滴的滾動路徑來分割字符,可以解決直線切割造成的過分分割問題。

引言

之前提過對于有粘連的字符可以使用滴水算法來解決分割,但智商捉急的我實在是領悟不了這個算法的精髓,幸好有小伙伴已經(jīng)實現(xiàn)相關代碼。

我對上面的代碼進行了一些小修改,同時升級為python3的代碼。

還是以這張圖片為例:

在以前的我們已經(jīng)知道這種簡單的粘連可以通過控制閾值來實現(xiàn)分割,這里我們使用滴水算法。

首先使用之前文章中介紹的垂直投影或者連通域先進行一次切割處理,得到結果如下:

針對于最后粘連情況來使用滴水算法處理:

from itertools import groupbydef binarizing(img,threshold): """傳入image對象進行灰度、二值處理""" img = img.convert("L") # 轉灰度 pixdata = img.load() w, h = img.size # 遍歷所有像素,大于閾值的為黑色 for y in range(h):  for x in range(w):   if pixdata[x, y] < threshold:    pixdata[x, y] = 0   else:    pixdata[x, y] = 255 return imgdef vertical(img): """傳入二值化后的圖片進行垂直投影""" pixdata = img.load() w,h = img.size result = [] for x in range(w):  black = 0  for y in range(h):   if pixdata[x,y] == 0:    black += 1  result.append(black) return resultdef get_start_x(hist_width): """根據(jù)圖片垂直投影的結果來確定起點  hist_width中間值 前后取4個值 再這范圍內(nèi)取最小值 """ mid = len(hist_width) // 2 # 注意py3 除法和py2不同 temp = hist_width[mid-4:mid+5] return mid - 4 + temp.index(min(temp))def get_nearby_pix_value(img_pix,x,y,j): """獲取臨近5個點像素數(shù)據(jù)""" if j == 1:  return 0 if img_pix[x-1,y+1] == 0 else 1 elif j ==2:  return 0 if img_pix[x,y+1] == 0 else 1 elif j ==3:  return 0 if img_pix[x+1,y+1] == 0 else 1 elif j ==4:  return 0 if img_pix[x+1,y] == 0 else 1 elif j ==5:  return 0 if img_pix[x-1,y] == 0 else 1 else:  raise Exception("get_nearby_pix_value error")def get_end_route(img,start_x,height): """獲取滴水路徑""" left_limit = 0 right_limit = img.size[0] - 1 end_route = [] cur_p = (start_x,0) last_p = cur_p end_route.append(cur_p) while cur_p[1] < (height-1):  sum_n = 0  max_w = 0  next_x = cur_p[0]  next_y = cur_p[1]  pix_img = img.load()  for i in range(1,6):   cur_w = get_nearby_pix_value(pix_img,cur_p[0],cur_p[1],i) * (6-i)   sum_n += cur_w   if max_w < cur_w:    max_w = cur_w  if sum_n == 0:   # 如果全黑則看慣性   max_w = 4  if sum_n == 15:   max_w = 6  if max_w == 1:   next_x = cur_p[0] - 1   next_y = cur_p[1]  elif max_w == 2:   next_x = cur_p[0] + 1   next_y = cur_p[1]  elif max_w == 3:   next_x = cur_p[0] + 1   next_y = cur_p[1] + 1  elif max_w == 5:   next_x = cur_p[0] - 1   next_y = cur_p[1] + 1  elif max_w == 6:   next_x = cur_p[0]   next_y = cur_p[1] + 1  elif max_w == 4:   if next_x > cur_p[0]:    # 向右    next_x = cur_p[0] + 1    next_y = cur_p[1] + 1   if next_x < cur_p[0]:    next_x = cur_p[0]    next_y = cur_p[1] + 1   if sum_n == 0:    next_x = cur_p[0]    next_y = cur_p[1] + 1  else:   raise Exception("get end route error")  if last_p[0] == next_x and last_p[1] == next_y:   if next_x < cur_p[0]:    max_w = 5    next_x = cur_p[0] + 1    next_y = cur_p[1] + 1   else:    max_w = 3    next_x = cur_p[0] - 1    next_y = cur_p[1] + 1  last_p = cur_p  if next_x > right_limit:   next_x = right_limit   next_y = cur_p[1] + 1  if next_x < left_limit:   next_x = left_limit   next_y = cur_p[1] + 1  cur_p = (next_x,next_y)  end_route.append(cur_p) return end_routedef get_split_seq(projection_x): split_seq = [] start_x = 0 length = 0 for pos_x, val in enumerate(projection_x):  if val == 0 and length == 0:   continue  elif val == 0 and length != 0:   split_seq.append([start_x, length])   length = 0  elif val == 1:   if length == 0:    start_x = pos_x   length += 1  else:   raise Exception('generating split sequence occurs error') # 循環(huán)結束時如果length不為0,說明還有一部分需要append if length != 0:  split_seq.append([start_x, length]) return split_seqdef do_split(source_image, starts, filter_ends): """ 具體實行切割 : param starts: 每一行的起始點 tuple of list : param ends: 每一行的終止點 """ left = starts[0][0] top = starts[0][1] right = filter_ends[0][0] bottom = filter_ends[0][1] pixdata = source_image.load() for i in range(len(starts)):  left = min(starts[i][0], left)  top = min(starts[i][1], top)  right = max(filter_ends[i][0], right)  bottom = max(filter_ends[i][1], bottom) width = right - left + 1 height = bottom - top + 1 image = Image.new('RGB', (width, height), (255,255,255)) for i in range(height):  start = starts[i]  end = filter_ends[i]  for x in range(start[0], end[0]+1):   if pixdata[x,start[1]] == 0:    image.putpixel((x - left, start[1] - top), (0,0,0)) return imagedef drop_fall(img): """滴水分割""" width,height = img.size # 1 二值化 b_img = binarizing(img,200) # 2 垂直投影 hist_width = vertical(b_img) # 3 獲取起點 start_x = get_start_x(hist_width) # 4 開始滴水算法 start_route = [] for y in range(height):  start_route.append((0,y)) end_route = get_end_route(img,start_x,height) filter_end_route = [max(list(k)) for _,k in groupby(end_route,lambda x:x[1])] # 注意這里groupby img1 = do_split(img,start_route,filter_end_route) img1.save('cuts-d-1.png') start_route = list(map(lambda x : (x[0]+1,x[1]),filter_end_route)) # python3中map不返回list需要自己轉換 end_route = [] for y in range(height):  end_route.append((width-1,y)) img2 = do_split(img,start_route,end_route) img2.save('cuts-d-2.png')if __name__ == '__main__': p = Image.open("cuts-2.png") drop_fall(p)            
發(fā)表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發(fā)表
主站蜘蛛池模板: 友谊县| 绥宁县| 达日县| 辽阳县| 沛县| 宁国市| 津市市| 扬州市| 龙陵县| 西吉县| 江门市| 安新县| 宽城| 北辰区| 积石山| 长武县| 南召县| 宁津县| 浦东新区| 肇东市| 镇雄县| 西青区| 武宣县| 贵定县| 正定县| 谷城县| 盐津县| 巴青县| 师宗县| 庆阳市| 瓦房店市| 怀远县| 陈巴尔虎旗| 文化| 毕节市| 闽侯县| 闸北区| 五常市| 大埔县| 楚雄市| 永德县|