GyuhoLee

[Update] 변환된 xml을 문장별로 string list로 파싱

......@@ -20,14 +20,8 @@
</component>
<component name="ChangeListManager">
<list default="true" id="b9decb0c-dc9e-4239-bdad-09ea8dd5179d" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/.gitignore" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/inspectionProfiles/profiles_settings.xml" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/modules.xml" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/src.iml" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/vcs.xml" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/subtitle.py" beforeDir="false" afterPath="$PROJECT_DIR$/subtitle.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../캡스톤디자인_3주차_주간보고서.hwp" beforeDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......@@ -101,22 +95,22 @@
</option>
</component>
<component name="WindowStateProjectService">
<state width="1899" height="282" key="GridCell.Tab.0.bottom" timestamp="1604304028779">
<state width="1899" height="282" key="GridCell.Tab.0.bottom" timestamp="1604306110978">
<screen x="1920" y="0" width="1920" height="1040" />
</state>
<state width="1899" height="282" key="GridCell.Tab.0.bottom/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604304028779" />
<state width="1899" height="282" key="GridCell.Tab.0.center" timestamp="1604304028779">
<state width="1899" height="282" key="GridCell.Tab.0.bottom/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604306110978" />
<state width="1899" height="282" key="GridCell.Tab.0.center" timestamp="1604306110978">
<screen x="1920" y="0" width="1920" height="1040" />
</state>
<state width="1899" height="282" key="GridCell.Tab.0.center/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604304028779" />
<state width="1899" height="282" key="GridCell.Tab.0.left" timestamp="1604304028779">
<state width="1899" height="282" key="GridCell.Tab.0.center/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604306110978" />
<state width="1899" height="282" key="GridCell.Tab.0.left" timestamp="1604306110978">
<screen x="1920" y="0" width="1920" height="1040" />
</state>
<state width="1899" height="282" key="GridCell.Tab.0.left/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604304028779" />
<state width="1899" height="282" key="GridCell.Tab.0.right" timestamp="1604304028779">
<state width="1899" height="282" key="GridCell.Tab.0.left/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604306110978" />
<state width="1899" height="282" key="GridCell.Tab.0.right" timestamp="1604306110978">
<screen x="1920" y="0" width="1920" height="1040" />
</state>
<state width="1899" height="282" key="GridCell.Tab.0.right/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604304028779" />
<state width="1899" height="282" key="GridCell.Tab.0.right/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604306110978" />
<state x="2381" y="164" key="SettingsEditor" timestamp="1604303734485">
<screen x="1920" y="0" width="1920" height="1040" />
</state>
......
from pytube import YouTube
from xml.etree import ElementTree
#youtube url의 자막 -> xml으로 가져오기
video_url = 'https://www.youtube.com/watch?v=ecUWKU_v318'
yt = YouTube(video_url)
title = yt.title
description = yt.description
caption = yt.captions.get_by_language_code('ko')
caption_xml = caption.xml_captions
#xml -> string list로 파싱(문장별)
root = ElementTree.fromstring(caption_xml)
sentences = []
print(root.tag, root.attrib)
for child in root.findall("text"):
sentences.append(child.text.replace('\n', ' '))
print(sentences)
\ No newline at end of file
......