find_url.py
961 Bytes
import re
'''
test cast:
lines1 = "['mailto:psalms1273@merryyear.org', 'http://www.merryyear.org/abbs/?act=bbs&subAct=view&bid=Notice&page=1&order_index=no&order_type=desc&seq=1571']"
lines = "['https://www.shinhanhope.com/web/main.jsp']"
ll ="['https://www.childfund.or.kr/news/noticeView.do?bmTemplate=/inc/jsp/board/template/default&bdId=20019410&bmIds=10000023,10000097']"
l="http://bokjiro.go.kr/gowf/wel/welsvc/svcsearch/WelGvmtSvcSearchView.do?servId=WII00000124"
t = "https://welfare.gangdong.go.kr/site/contents/bokji/html00/html00/index3.html"
'''
def find_url_in_str(url) :
# regex = re.compile(r'(http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?[^\'\]](\/|\/([\w#!:.?+=&%@!\-\/]))?', re.IGNORECASE)
regex = re.compile(r'https?://(\w*:\w*@)?[-\w.]+(:\d+)?(/([\w/_.]*(\?\S+)?)?[^\'\]])?', re.IGNORECASE)
m = regex.search(url)
if m != None:
return m.group()
return None
find_url_in_str()