first
0 parents
Showing
12 changed files
with
476 additions
and
0 deletions
funclip/__init__.py
0 → 100644
File mode changed
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
funclip/argparse_tools.py
0 → 100644
| 1 | import argparse | ||
| 2 | from pathlib import Path | ||
| 3 | |||
| 4 | import yaml | ||
| 5 | import sys | ||
| 6 | |||
| 7 | |||
| 8 | class ArgumentParser(argparse.ArgumentParser): | ||
| 9 | """Simple implementation of ArgumentParser supporting config file | ||
| 10 | |||
| 11 | This class is originated from https://github.com/bw2/ConfigArgParse, | ||
| 12 | but this class is lack of some features that it has. | ||
| 13 | |||
| 14 | - Not supporting multiple config files | ||
| 15 | - Automatically adding "--config" as an option. | ||
| 16 | - Not supporting any formats other than yaml | ||
| 17 | - Not checking argument type | ||
| 18 | |||
| 19 | """ | ||
| 20 | |||
| 21 | def __init__(self, *args, **kwargs): | ||
| 22 | super().__init__(*args, **kwargs) | ||
| 23 | self.add_argument("--config", help="Give config file in yaml format") | ||
| 24 | |||
| 25 | def parse_known_args(self, args=None, namespace=None): | ||
| 26 | # Once parsing for setting from "--config" | ||
| 27 | _args, _ = super().parse_known_args(args, namespace) | ||
| 28 | if _args.config is not None: | ||
| 29 | if not Path(_args.config).exists(): | ||
| 30 | self.error(f"No such file: {_args.config}") | ||
| 31 | |||
| 32 | with open(_args.config, "r", encoding="utf-8") as f: | ||
| 33 | d = yaml.safe_load(f) | ||
| 34 | if not isinstance(d, dict): | ||
| 35 | self.error("Config file has non dict value: {_args.config}") | ||
| 36 | |||
| 37 | for key in d: | ||
| 38 | for action in self._actions: | ||
| 39 | if key == action.dest: | ||
| 40 | break | ||
| 41 | else: | ||
| 42 | self.error(f"unrecognized arguments: {key} (from {_args.config})") | ||
| 43 | |||
| 44 | # NOTE(kamo): Ignore "--config" from a config file | ||
| 45 | # NOTE(kamo): Unlike "configargparse", this module doesn't check type. | ||
| 46 | # i.e. We can set any type value regardless of argument type. | ||
| 47 | self.set_defaults(**d) | ||
| 48 | return super().parse_known_args(args, namespace) | ||
| 49 | |||
| 50 | |||
| 51 | def get_commandline_args(): | ||
| 52 | extra_chars = [ | ||
| 53 | " ", | ||
| 54 | ";", | ||
| 55 | "&", | ||
| 56 | "(", | ||
| 57 | ")", | ||
| 58 | "|", | ||
| 59 | "^", | ||
| 60 | "<", | ||
| 61 | ">", | ||
| 62 | "?", | ||
| 63 | "*", | ||
| 64 | "[", | ||
| 65 | "]", | ||
| 66 | "$", | ||
| 67 | "`", | ||
| 68 | '"', | ||
| 69 | "\\", | ||
| 70 | "!", | ||
| 71 | "{", | ||
| 72 | "}", | ||
| 73 | ] | ||
| 74 | |||
| 75 | # Escape the extra characters for shell | ||
| 76 | argv = [ | ||
| 77 | arg.replace("'", "'\\''") | ||
| 78 | if all(char not in arg for char in extra_chars) | ||
| 79 | else "'" + arg.replace("'", "'\\''") + "'" | ||
| 80 | for arg in sys.argv | ||
| 81 | ] | ||
| 82 | |||
| 83 | return sys.executable + " " + " ".join(argv) | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
funclip/imagemagick_test.py
0 → 100644
| 1 | from moviepy.editor import * | ||
| 2 | from moviepy.video.tools.subtitles import SubtitlesClip | ||
| 3 | |||
| 4 | generator = lambda txt: TextClip(txt, font='./font/STHeitiMedium.ttc', fontsize=48, color='white') | ||
| 5 | subs = [((0, 2), 'sub1中文字幕'), | ||
| 6 | ((2, 4), 'subs2'), | ||
| 7 | ((4, 6), 'subs3'), | ||
| 8 | ((6, 8), 'subs4')] | ||
| 9 | |||
| 10 | subtitles = SubtitlesClip(subs, generator) | ||
| 11 | |||
| 12 | video = VideoFileClip("examples/2022云栖大会_片段.mp4.mp4") | ||
| 13 | video = video.subclip(0, 8) | ||
| 14 | video = CompositeVideoClip([video, subtitles.set_pos(('center','bottom'))]) | ||
| 15 | |||
| 16 | video.write_videofile("test_output.mp4") | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
funclip/launch.py
0 → 100644
| 1 | import gradio as gr | ||
| 2 | from funasr import AutoModel | ||
| 3 | from videoclipper import VideoClipper | ||
| 4 | |||
| 5 | if __name__ == "__main__": | ||
| 6 | funasr_model = AutoModel(model="iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", | ||
| 7 | model_revision="v2.0.4", | ||
| 8 | vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", | ||
| 9 | vad_model_revision="v2.0.4", | ||
| 10 | punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch", | ||
| 11 | punc_model_revision="v2.0.4", | ||
| 12 | spk_model="damo/speech_campplus_sv_zh-cn_16k-common", | ||
| 13 | spk_model_revision="v2.0.2", | ||
| 14 | ) | ||
| 15 | audio_clipper = VideoClipper(funasr_model) | ||
| 16 | |||
| 17 | |||
| 18 | def audio_recog(audio_input, sd_switch, hotwords): | ||
| 19 | # import pdb; pdb.set_trace() | ||
| 20 | print(audio_input) | ||
| 21 | return audio_clipper.recog(audio_input, sd_switch, hotwords=hotwords) | ||
| 22 | |||
| 23 | |||
| 24 | def audio_clip(dest_text, audio_spk_input, start_ost, end_ost, state): | ||
| 25 | return audio_clipper.clip(dest_text, start_ost, end_ost, state, dest_spk=audio_spk_input) | ||
| 26 | |||
| 27 | |||
| 28 | def video_recog(video_input, sd_switch, hotwords): | ||
| 29 | return audio_clipper.video_recog(video_input, sd_switch, hotwords) | ||
| 30 | |||
| 31 | |||
| 32 | def video_clip(dest_text, video_spk_input, start_ost, end_ost, state): | ||
| 33 | return audio_clipper.video_clip(dest_text, start_ost, end_ost, state, dest_spk=video_spk_input) | ||
| 34 | |||
| 35 | |||
| 36 | def video_clip_addsub(dest_text, video_spk_input, start_ost, end_ost, state, font_size, font_color): | ||
| 37 | return audio_clipper.video_clip(dest_text, start_ost, end_ost, state, font_size, font_color, add_sub=True, | ||
| 38 | dest_spk=video_spk_input) | ||
| 39 | |||
| 40 | |||
| 41 | top_md_1 = (""" | ||
| 42 | **<font color="#1785c4"></font>** | ||
| 43 | <div align="center"> | ||
| 44 | <div style="display:flex; gap: 0.25rem;" align="center"> | ||
| 45 | </div> | ||
| 46 | </div> | ||
| 47 | """) | ||
| 48 | |||
| 49 | top_md_2 = (""" | ||
| 50 | <div align="center"> | ||
| 51 | <div style="display:flex; gap: 0.25rem;" align="center"> | ||
| 52 | </div> | ||
| 53 | </div> | ||
| 54 | """) | ||
| 55 | |||
| 56 | top_md_3 = (""" | ||
| 57 | * Step1: 上传视频或音频文件(或使用下方的用例体验),点击 **<font color="#f7802b">识别</font>** 按钮 | ||
| 58 | * Step2: 复制识别结果中所需的文字至右上方,或者右设置说话人标识,设置偏移与字幕配置(可选) | ||
| 59 | * Step3: 点击 **<font color="#f7802b">裁剪</font>** 按钮或 **<font color="#f7802b">裁剪并添加字幕</font>** 按钮获得结果 | ||
| 60 | """) | ||
| 61 | |||
| 62 | # gradio interface | ||
| 63 | with gr.Blocks() as demo: | ||
| 64 | # gr.Image("./examples/guide.png", show_label=False) | ||
| 65 | gr.Markdown(top_md_1) | ||
| 66 | gr.Markdown(top_md_2) | ||
| 67 | gr.Markdown(top_md_3) | ||
| 68 | video_state = gr.State() | ||
| 69 | audio_state = gr.State() | ||
| 70 | with gr.Tab("🎥✂️视频裁剪 Video Clipping"): | ||
| 71 | with gr.Row(): | ||
| 72 | with gr.Column(): | ||
| 73 | video_input = gr.Video(label="🎥视频输入 Video Input") | ||
| 74 | with gr.Row(): | ||
| 75 | video_sd_switch = gr.Radio(["no", "yes"], label="👥是否区分说话人 Recognize Speakers", | ||
| 76 | value='no') | ||
| 77 | hotwords_input = gr.Textbox(label="🚒热词 Hotwords") | ||
| 78 | recog_button2 = gr.Button("👂识别 Recognize") | ||
| 79 | video_text_output = gr.Textbox(label="✏️识别结果 Recognition Result") | ||
| 80 | video_srt_output = gr.Textbox(label="📖SRT字幕内容 RST Subtitles") | ||
| 81 | with gr.Column(): | ||
| 82 | video_text_input = gr.Textbox(label="✏️待裁剪文本 Text to Clip (多段文本使用'#'连接)") | ||
| 83 | video_spk_input = gr.Textbox(label="✏️待裁剪说话人 Speaker to Clip (多个说话人使用'#'连接)") | ||
| 84 | with gr.Row(): | ||
| 85 | video_start_ost = gr.Slider(minimum=-500, maximum=1000, value=0, step=50, | ||
| 86 | label="⏪开始位置偏移 Start Offset (ms)") | ||
| 87 | video_end_ost = gr.Slider(minimum=-500, maximum=1000, value=100, step=50, | ||
| 88 | label="⏩结束位置偏移 End Offset (ms)") | ||
| 89 | with gr.Row(): | ||
| 90 | font_size = gr.Slider(minimum=10, maximum=100, value=32, step=2, | ||
| 91 | label="🔠字幕字体大小 Subtitle Font Size") | ||
| 92 | font_color = gr.Radio(["black", "white", "green", "red"], label="🌈字幕颜色 Subtitle Color", | ||
| 93 | value='white') | ||
| 94 | # font = gr.Radio(["黑体", "Alibaba Sans"], label="字体 Font") | ||
| 95 | with gr.Row(): | ||
| 96 | clip_button2 = gr.Button("✂️裁剪\nClip") | ||
| 97 | clip_button3 = gr.Button("✂️裁剪并添加字幕\nClip and Generate Subtitles") | ||
| 98 | video_output = gr.Video(label="🎥裁剪结果 Audio Clipped") | ||
| 99 | video_mess_output = gr.Textbox(label="ℹ️裁剪信息 Clipping Log") | ||
| 100 | video_srt_clip_output = gr.Textbox(label="📖裁剪部分SRT字幕内容 Clipped RST Subtitles") | ||
| 101 | |||
| 102 | with gr.Tab("🔊✂️音频裁剪 Audio Clipping"): | ||
| 103 | with gr.Row(): | ||
| 104 | with gr.Column(): | ||
| 105 | audio_input = gr.Audio(label="🔊音频输入 Audio Input") | ||
| 106 | with gr.Row(): | ||
| 107 | audio_sd_switch = gr.Radio(["no", "yes"], label="👥是否区分说话人 Recognize Speakers", | ||
| 108 | value='no') | ||
| 109 | hotwords_input2 = gr.Textbox(label="🚒热词 Hotwords") | ||
| 110 | recog_button1 = gr.Button("👂识别 Recognize") | ||
| 111 | audio_text_output = gr.Textbox(label="✏️识别结果 Recognition Result") | ||
| 112 | audio_srt_output = gr.Textbox(label="📖SRT字幕内容 RST Subtitles") | ||
| 113 | with gr.Column(): | ||
| 114 | audio_text_input = gr.Textbox(label="✏️待裁剪文本 Text to Clip (多段文本使用'#'连接)") | ||
| 115 | audio_spk_input = gr.Textbox(label="✏️待裁剪说话人 Speaker to Clip (多个说话人使用'#'连接)") | ||
| 116 | with gr.Row(): | ||
| 117 | audio_start_ost = gr.Slider(minimum=-500, maximum=1000, value=0, step=50, | ||
| 118 | label="⏪开始位置偏移 Start Offset (ms)") | ||
| 119 | audio_end_ost = gr.Slider(minimum=-500, maximum=1000, value=0, step=50, | ||
| 120 | label="⏩结束位置偏移 End Offset (ms)") | ||
| 121 | with gr.Row(): | ||
| 122 | clip_button1 = gr.Button("✂️裁剪 Clip") | ||
| 123 | audio_output = gr.Audio(label="🔊裁剪结果 Audio Clipped") | ||
| 124 | audio_mess_output = gr.Textbox(label="ℹ️裁剪信息 Clipping Log") | ||
| 125 | audio_srt_clip_output = gr.Textbox(label="📖裁剪部分SRT字幕内容 Clipped RST Subtitles") | ||
| 126 | |||
| 127 | recog_button1.click(audio_recog, | ||
| 128 | inputs=[audio_input, audio_sd_switch, hotwords_input2], | ||
| 129 | outputs=[audio_text_output, audio_srt_output, audio_state]) | ||
| 130 | clip_button1.click(audio_clip, | ||
| 131 | inputs=[audio_text_input, audio_spk_input, audio_start_ost, audio_end_ost, audio_state], | ||
| 132 | outputs=[audio_output, audio_mess_output, audio_srt_clip_output]) | ||
| 133 | |||
| 134 | recog_button2.click(video_recog, | ||
| 135 | inputs=[video_input, video_sd_switch, hotwords_input], | ||
| 136 | outputs=[video_text_output, video_srt_output, video_state]) | ||
| 137 | clip_button2.click(video_clip, | ||
| 138 | inputs=[video_text_input, video_spk_input, video_start_ost, video_end_ost, video_state], | ||
| 139 | outputs=[video_output, video_mess_output, video_srt_clip_output]) | ||
| 140 | clip_button3.click(video_clip_addsub, | ||
| 141 | inputs=[video_text_input, video_spk_input, video_start_ost, video_end_ost, video_state, | ||
| 142 | font_size, font_color], | ||
| 143 | outputs=[video_output, video_mess_output, video_srt_clip_output]) | ||
| 144 | |||
| 145 | # start gradio service in local | ||
| 146 | demo.launch() |
funclip/subtitle_utils.py
0 → 100644
| 1 | def time_convert(ms): | ||
| 2 | ms = int(ms) | ||
| 3 | tail = ms % 1000 | ||
| 4 | s = ms // 1000 | ||
| 5 | mi = s // 60 | ||
| 6 | s = s % 60 | ||
| 7 | h = mi // 60 | ||
| 8 | mi = mi % 60 | ||
| 9 | h = "00" if h == 0 else str(h) | ||
| 10 | mi = "00" if mi == 0 else str(mi) | ||
| 11 | s = "00" if s == 0 else str(s) | ||
| 12 | tail = str(tail) | ||
| 13 | if len(h) == 1: h = '0' + h | ||
| 14 | if len(mi) == 1: mi = '0' + mi | ||
| 15 | if len(s) == 1: s = '0' + s | ||
| 16 | return "{}:{}:{},{}".format(h, mi, s, tail) | ||
| 17 | |||
| 18 | |||
| 19 | class Text2SRT(): | ||
| 20 | def __init__(self, text, timestamp, offset=0): | ||
| 21 | self.token_list = [i for i in text.split() if len(i)] | ||
| 22 | self.timestamp = timestamp | ||
| 23 | start, end = timestamp[0][0] - offset, timestamp[-1][1] - offset | ||
| 24 | self.start_sec, self.end_sec = start, end | ||
| 25 | self.start_time = time_convert(start) | ||
| 26 | self.end_time = time_convert(end) | ||
| 27 | def text(self): | ||
| 28 | res = "" | ||
| 29 | for word in self.token_list: | ||
| 30 | if '\u4e00' <= word <= '\u9fff': | ||
| 31 | res += word | ||
| 32 | else: | ||
| 33 | res += " " + word | ||
| 34 | return res | ||
| 35 | def len(self): | ||
| 36 | return len(self.token_list) | ||
| 37 | def srt(self, acc_ost=0.0): | ||
| 38 | return "{} --> {}\n{}\n".format( | ||
| 39 | time_convert(self.start_sec+acc_ost*1000), | ||
| 40 | time_convert(self.end_sec+acc_ost*1000), | ||
| 41 | self.text()) | ||
| 42 | def time(self, acc_ost=0.0): | ||
| 43 | return (self.start_sec/1000+acc_ost, self.end_sec/1000+acc_ost) | ||
| 44 | |||
| 45 | |||
| 46 | def generate_srt(sentence_list): | ||
| 47 | srt_total = '' | ||
| 48 | for i, d in enumerate(sentence_list): | ||
| 49 | t2s = Text2SRT(d['text'], d['timestamp']) | ||
| 50 | if 'spk' in d: | ||
| 51 | srt_total += "{} spk{}\n{}".format(i, d['spk'], t2s.srt()) | ||
| 52 | else: | ||
| 53 | srt_total += "{}\n{}".format(i, t2s.srt()) | ||
| 54 | return srt_total | ||
| 55 | |||
| 56 | def generate_srt_clip(sentence_list, start, end, begin_index=0, time_acc_ost=0.0): | ||
| 57 | start, end = int(start * 1000), int(end * 1000) | ||
| 58 | srt_total = '' | ||
| 59 | cc = 1 + begin_index | ||
| 60 | subs = [] | ||
| 61 | for i, d in enumerate(sentence_list): | ||
| 62 | if d['timestamp'][-1][1] <= start: | ||
| 63 | continue | ||
| 64 | if d['timestamp'][0][0] >= end: | ||
| 65 | break | ||
| 66 | # parts in between | ||
| 67 | if (d['timestamp'][-1][1] <= end and d['timestamp'][0][0] > start) or (d['timestamp'][-1][1] == end and d['timestamp'][0][0] == start): | ||
| 68 | t2s = Text2SRT(d['text'], d['timestamp'], offset=start) | ||
| 69 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost)) | ||
| 70 | subs.append((t2s.time(time_acc_ost), t2s.text())) | ||
| 71 | cc += 1 | ||
| 72 | continue | ||
| 73 | if d['timestamp'][0][0] <= start: | ||
| 74 | if not d['timestamp'][-1][1] > end: | ||
| 75 | for j, ts in enumerate(d['timestamp']): | ||
| 76 | if ts[1] > start: | ||
| 77 | break | ||
| 78 | _text = " ".join(d['text'].split()[j:]) | ||
| 79 | _ts = d['timestamp'][j:] | ||
| 80 | else: | ||
| 81 | for j, ts in enumerate(d['timestamp']): | ||
| 82 | if ts[1] > start: | ||
| 83 | _start = j | ||
| 84 | break | ||
| 85 | for j, ts in enumerate(d['timestamp']): | ||
| 86 | if ts[1] > end: | ||
| 87 | _end = j | ||
| 88 | break | ||
| 89 | _text = " ".join(d['text'].split()[_start:_end]) | ||
| 90 | _ts = d['timestamp'][_start:_end] | ||
| 91 | if len(ts): | ||
| 92 | t2s = Text2SRT(_text, _ts, offset=start) | ||
| 93 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost)) | ||
| 94 | subs.append((t2s.time(time_acc_ost), t2s.text())) | ||
| 95 | cc += 1 | ||
| 96 | continue | ||
| 97 | if d['timestamp'][-1][1] > end: | ||
| 98 | for j, ts in enumerate(d['timestamp']): | ||
| 99 | if ts[1] > end: | ||
| 100 | break | ||
| 101 | _text = " ".join(d['text'].split()[:j]) | ||
| 102 | _ts = d['timestamp'][:j] | ||
| 103 | if len(_ts): | ||
| 104 | t2s = Text2SRT(_text, _ts, offset=start) | ||
| 105 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost)) | ||
| 106 | subs.append( | ||
| 107 | (t2s.time(time_acc_ost), t2s.text()) | ||
| 108 | ) | ||
| 109 | cc += 1 | ||
| 110 | continue | ||
| 111 | return srt_total, subs, cc |
funclip/test.sh
0 → 100644
| 1 | # step1: Recognize | ||
| 2 | python videoclipper.py --stage 1 \ | ||
| 3 | --file ../examples/2022云栖大会_片段.mp4 \ | ||
| 4 | --sd_switch yes \ | ||
| 5 | --output_dir ./output | ||
| 6 | # now you can find recognition results and entire SRT file in ./output/ | ||
| 7 | # step2: Clip | ||
| 8 | python videoclipper.py --stage 2 \ | ||
| 9 | --file ../examples/2022云栖大会_片段.mp4 \ | ||
| 10 | --output_dir ./output \ | ||
| 11 | --dest_text '所以这个是我们办这个奖的初心啊,我们也会一届一届的办下去' \ | ||
| 12 | # --dest_spk spk0 \ | ||
| 13 | --start_ost 0 \ | ||
| 14 | --end_ost 100 \ | ||
| 15 | --output_file './output/res.mp4' | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
funclip/trans_utils.py
0 → 100644
| 1 | PUNC_LIST = [',', '。', '!', '?', '、'] | ||
| 2 | |||
| 3 | |||
| 4 | def pre_proc(text): | ||
| 5 | res = '' | ||
| 6 | for i in range(len(text)): | ||
| 7 | if text[i] in PUNC_LIST: | ||
| 8 | continue | ||
| 9 | if '\u4e00' <= text[i] <= '\u9fff': | ||
| 10 | if len(res) and res[-1] != " ": | ||
| 11 | res += ' ' + text[i]+' ' | ||
| 12 | else: | ||
| 13 | res += text[i]+' ' | ||
| 14 | else: | ||
| 15 | res += text[i] | ||
| 16 | if res[-1] == ' ': | ||
| 17 | res = res[:-1] | ||
| 18 | return res | ||
| 19 | |||
| 20 | def proc(raw_text, timestamp, dest_text): | ||
| 21 | # simple matching | ||
| 22 | ld = len(dest_text.split()) | ||
| 23 | mi, ts = [], [] | ||
| 24 | offset = 0 | ||
| 25 | while True: | ||
| 26 | fi = raw_text.find(dest_text, offset, len(raw_text)) | ||
| 27 | # import pdb; pdb.set_trace() | ||
| 28 | ti = raw_text[:fi].count(' ') | ||
| 29 | if fi == -1: | ||
| 30 | break | ||
| 31 | offset = fi + ld | ||
| 32 | mi.append(fi) | ||
| 33 | ts.append([timestamp[ti][0]*16, timestamp[ti+ld-1][1]*16]) | ||
| 34 | # import pdb; pdb.set_trace() | ||
| 35 | return ts | ||
| 36 | |||
| 37 | def proc_spk(dest_spk, sd_sentences): | ||
| 38 | ts = [] | ||
| 39 | for d in sd_sentences: | ||
| 40 | d_start = d['timestamp'][0][0] | ||
| 41 | d_end = d['timestamp'][-1][1] | ||
| 42 | spkid=dest_spk[3:] | ||
| 43 | # import pdb; pdb.set_trace() | ||
| 44 | if str(d['spk']) == spkid and d_end-d_start>999: | ||
| 45 | ts.append([d['start']*16, d['end']*16]) | ||
| 46 | return ts | ||
| 47 | |||
| 48 | def generate_vad_data(data, sd_sentences, sr=16000): | ||
| 49 | assert len(data.shape) == 1 | ||
| 50 | vad_data = [] | ||
| 51 | for d in sd_sentences: | ||
| 52 | d_start = round(d['ts_list'][0][0]/1000, 2) | ||
| 53 | d_end = round(d['ts_list'][-1][1]/1000, 2) | ||
| 54 | vad_data.append([d_start, d_end, data[int(d_start * sr):int(d_end * sr)]]) | ||
| 55 | return vad_data | ||
| 56 | |||
| 57 | def write_state(output_dir, state): | ||
| 58 | for key in ['/recog_res_raw', '/timestamp', '/sentences']:#, '/sd_sentences']: | ||
| 59 | with open(output_dir+key, 'w') as fout: | ||
| 60 | fout.write(str(state[key[1:]])) | ||
| 61 | if 'sd_sentences' in state: | ||
| 62 | with open(output_dir+'/sd_sentences', 'w') as fout: | ||
| 63 | fout.write(str(state['sd_sentences'])) | ||
| 64 | |||
| 65 | import os | ||
| 66 | def load_state(output_dir): | ||
| 67 | state = {} | ||
| 68 | with open(output_dir+'/recog_res_raw') as fin: | ||
| 69 | line = fin.read() | ||
| 70 | state['recog_res_raw'] = line | ||
| 71 | with open(output_dir+'/timestamp') as fin: | ||
| 72 | line = fin.read() | ||
| 73 | state['timestamp'] = eval(line) | ||
| 74 | with open(output_dir+'/sentences') as fin: | ||
| 75 | line = fin.read() | ||
| 76 | state['sentences'] = eval(line) | ||
| 77 | if os.path.exists(output_dir+'/sd_sentences'): | ||
| 78 | with open(output_dir+'/sd_sentences') as fin: | ||
| 79 | line = fin.read() | ||
| 80 | state['sd_sentences'] = eval(line) | ||
| 81 | return state | ||
| 82 | |||
| 83 | import numpy as np | ||
| 84 | def convert_pcm_to_float(data): | ||
| 85 | if data.dtype == np.float64: | ||
| 86 | return data | ||
| 87 | elif data.dtype == np.float32: | ||
| 88 | return data.astype(np.float64) | ||
| 89 | elif data.dtype == np.int16: | ||
| 90 | bit_depth = 16 | ||
| 91 | elif data.dtype == np.int32: | ||
| 92 | bit_depth = 32 | ||
| 93 | elif data.dtype == np.int8: | ||
| 94 | bit_depth = 8 | ||
| 95 | else: | ||
| 96 | raise ValueError("Unsupported audio data type") | ||
| 97 | |||
| 98 | # Now handle the integer types | ||
| 99 | max_int_value = float(2 ** (bit_depth - 1)) | ||
| 100 | if bit_depth == 8: | ||
| 101 | data = data - 128 | ||
| 102 | return (data.astype(np.float64) / max_int_value) | ||
| 103 | |||
| 104 | |||
| 105 | |||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
funclip/videoclipper.py
0 → 100644
This diff is collapsed.
Click to expand it.
-
Please register or sign in to post a comment