又做了一个拆分双语歌词的工具

缘由#

前情提要: 合并歌词脚本

知周所众, 我下载音乐的工作流是这样的:

1
graph TB
2
    A[B 站手机客户端缓存视频] --> B[adbfs 挂载到电脑]
3
    B --> C[wyf9/music 脚本提取出 mp3 文件]
4
    C --> D["**MusicPlayer2 下载封面 & 歌词**"]
5
    C --> E[从 Waylyrics 查找并下载歌词]
6
    E --> H[手动下载封面]
7
    C --> F[从网上手动下载封面 & 歌词]
8
    D --> G["git add && git commit -S && git push"]
9
    H --> G
10
    F --> G

问题就出在了从 MusicPlayer2 下载歌词这一步

MusicPlayer2 从网易云源下载歌词:

因此, 下载到的歌词是 [时间戳]原文 / 译文 的格式, 而这种格式很多软件都无法识别, 会直接当作单语歌词来处理:

Waylyrics

Salt Player

如你所见, 上面 MusicPlayer2 的编辑歌词界面中也有互转选项, 但是 太麻烦, 于是我在两个库 + Grok 的帮助下写出了这个拆分歌词的脚本.

代码#

点击展开

1
# coding: utf-8
2

3
import os
4
from copy import deepcopy
5

6
from mutagen.id3 import ID3
7
from mutagen.id3._frames import USLT
8
from pylrc import parse
9
from pylrc.classes import LyricLine
10
from charset_normalizer import detect
11
from colorama import Fore
12

13
from utils import *
14

15

16
def help(code: int = 0):
17
    print('''
18
Usage:
19
  - main.py <mode> <path>
20
  - main.py <mode> <path> <prefix>
21
  - main.py <mode> <path> <prefix_start> <prefix_end> [prefix_spliter]
22
    * contains prefix_start and prefix_end
23
    * perfix_spliter is _ by default
24
''')
25
    exit(code)
26

27

28
def ms_to_tag(line: LyricLine) -> str:
29
    return f'{line.minutes:02d}:{line.seconds:02d}.{line.milliseconds:03d}'
30

31

32
def extract_lyrics(id3: ID3) -> tuple[str | None, str]:
33
    try:
34
        uslt_frames = id3.getall('USLT')
35
        if not uslt_frames:
36
            return None, "No lyrics (USLT) tag found"
37

38
        lyrics = uslt_frames[0].text
39
        return lyrics, f"Found lyrics (lang: {uslt_frames[0].lang}, description: {uslt_frames[0].desc})"
40

41
    except Exception as e:
42
        return None, f"Error reading ID3 tags: {e}"
43

44

45
def edit_lyrics(id3: ID3, lrc: str) -> tuple[bool, str]:
46
    try:
47
        uslt_frames = id3.getall('USLT')
48
        if not uslt_frames:
49
            return False, "No lyrics (USLT) tag found"
50
        lang = uslt_frames[0].lang
51
        desc = uslt_frames[0].desc
52
        id3.delall('USLT')
53
        id3.add(USLT(lang=lang, desc=desc, text=lrc))
54
        # 保存更改
55
        id3.save()
56
        return True, "Lyrics successfully written to USLT tag"
57
    except Exception as e:
58
        return False, f"Error writing lyrics to ID3 tags: {e}"
59

60

61
def process(filename: str, mode: str) -> tuple[bool | None, str]:
62
    # get lyrics
63
    if mode == 'mp3':
64
        id3 = ID3(filename)
65
        lrc, desc = extract_lyrics(id3)
66
        if not lrc:
67
            return False, desc
68
        lrc = lrc.replace('\n', '')
69
        debug(f"* Read mp3 tag content (first 50 chars): {lrc[:50].replace('\n', f'{Fore.CYAN}\\n{Fore.BLUE}')}")
70
    else:
71
        try:
72
            with open(filename, 'rb') as f:
73
                raw_data = f.read()
74
                encoding_result = detect(raw_data)
75
                encoding = encoding_result.get('encoding')
76
                confidence = encoding_result.get('confidence')
77
                debug(f"* Detected encoding: {encoding} (confidence: {confidence})")
78

79
            if encoding is None:
80
                return False, f"Unable to detect encoding for {filename}"
81

82
            with open(filename, 'r', encoding=encoding) as f:
83
                lrc = ''.join(f.readlines())
84
            debug(f"* Read lrc content (first 50 chars): {lrc[:50].replace('\n', f'{Fore.CYAN}\\n{Fore.BLUE}')}")
85
        except UnicodeDecodeError as e:
86
            return False, f"Failed to read file with encoding {encoding}: {e}"
87
        except Exception as e:
88
            return False, f"Error reading file: {e}"
89

90
    # check if need split
91
    if not lrc.find(' / ') > 10:
92
        return None, ''
93

94
    # process lyrics
95
    lyric = parse(lrc)
96
    new = deepcopy(lyric)
97
    new.clear()
98

99
    i = 0
100
    line: LyricLine
101
    while i < len(lyric):
102
        try:
103
            line = lyric[i]
104
            if ' / ' in line.text:
105
                origline = deepcopy(line)
106
                tranline = deepcopy(line)
107
                orig, tran = line.text.split(' / ', 1)
108
                origline.text = orig.lstrip(' ').rstrip(' ')
109
                tranline.text = tran.lstrip(' ').rstrip(' ')
110
                new.append(origline)
111
                new.append(tranline)
112
            else:
113
                new.append(line)
114
            i += 1
115
        except IndexError:
116
            break
117

118
    # write file
119
    if mode == 'mp3':
120
        edit_lyrics(id3, new.toLRC())
121
        id3.save()
122
    else:
123
        with open(filename, 'w', encoding='utf-8') as f:
124
            f.write(new.toLRC())
125

126
    return True, ''
127

128

129
def main():
130
    # show help
131
    if '--help' in argv:
132
        help()
133

134
    # --- get args
135
    p = perf_counter()
136
    mode = getargv(1)
137
    assert mode, 'Please provide a mode at param #1!'
138
    assert mode == 'mp3' or mode == 'lrc', 'Invaild mode, it should be mp3 or lrc!'
139
    path = getargv(2)
140
    assert path, 'Please provide a path at param #2!'
141
    assert os.path.isdir(path), 'Path isn\'t exist!'
142
    arg3 = getargv(3)
143
    arg4 = getargv(4)
144
    prefix_mode = 0  # no prefix
145
    if arg3:
146
        if arg4:
147
            try:
148
                prefix_start = int(arg3)
149
                prefix_end = int(arg4)
150
                prefix_spliter = getargv(5) or '_'
151
                prefix_mode = 2  # number range
152
            except:
153
                raise AssertionError('Prefix start / end isn\'t number!')
154
        else:
155
            prefix: str = arg3
156
            prefix_mode = 1  # str prefix
157
    debug(f'* get args took {p()}ms')
158

159
    # --- get files
160
    p = perf_counter()
161
    raw = os.listdir(path)
162
    log(f'All files and folders: {len(raw)}')
163
    files = []
164
    debug(f'* prefix mode: {prefix_mode}')
165
    for f in raw:
166
        if f.endswith(mode):
167
            if prefix_mode == 1:
168
                if f.startswith(prefix):
169
                    files.append(f)
170
            elif prefix_mode == 2:
171
                pref = f.split(prefix_spliter, 1)[0]
172
                if not pref:
173
                    continue
174
                try:
175
                    pref = int(pref)
176
                    if prefix_start <= pref <= prefix_end:
177
                        files.append(f)
178
                except ValueError:
179
                    continue
180
            else:
181
                files.append(f)
182
    debug(f'* get files took {p()}ms')
183
    log(f'Files filtered: {len(files)} - {files}')
184

185
    # --- process file
186
    for f in files:
187
        p = perf_counter()
188
        try:
189
            success, msg = process(os.path.join(path, f), mode=mode)
190
            if success is None:
191
                log(f'processing {f}: not needed')
192
            elif success:
193
                log(f'process {f} success')
194
            else:
195
                warn(f'processing {f} error: {msg}')
196
        except Exception as e:
197
            warn(f'processing {f} error: {e}')
198
        debug(f'* process {f} took {p()}ms')
199

200
    log('Finished!')
201

202

203
if __name__ == "__main__":
204
    try:
205
        main()
206
    except AssertionError as err:
207
        error(' '.join(err.args))
208
        help(1)

1
# coding: utf-8
2
from sys import argv
3
from time import perf_counter as __perf_counter
4

5
from colorama import Fore, Style
6

7

8
def getargv(key: int, default: str | None = None):
9
    try:
10
        return argv[key]
11
    except IndexError:
12
        return default
13

14

15
def perf_counter():
16
    '''
17
    获取一个性能计数器, 执行返回函数来结束计时, 并返回保留两位小数的毫秒值
18
    - copied from sleepy utils.py 😋
19
    '''
20
    start = __perf_counter()
21
    return lambda: round((__perf_counter() - start)*1000, 2)
22

23

24
def log(*content):
25
    print(f'{Fore.GREEN}{" ".join(str(c) for c in content)}{Style.RESET_ALL}')
26

27

28
def debug(*content):
29
    print(f'{Fore.BLUE}{" ".join(str(c) for c in content)}{Style.RESET_ALL}')
30

31

32
def warn(*content):
33
    print(f'{Fore.YELLOW}{" ".join(str(c) for c in content)}{Style.RESET_ALL}')
34

35

36
def error(*content):
37
    print(f'{Fore.RED}{" ".join(str(c) for c in content)}{Style.RESET_ALL}')

1
[project]
2
name = "split-multilang-lyric"
3
version = "0.1.0"
4
requires-python = ">=3.13"
5
dependencies = [
6
    "charset-normalizer>=3.4.3",
7
    "colorama>=0.4.6",
8
    "mutagen>=1.47.0",
9
    "pylrc>=0.1.2",
10
]

同样已经上传到 GitHub (split_multilang_lyric 目录)

wyf9

lrc-tools

Waiting for api.github.com...

00K

Waiting...

如何使用#

有三种使用方式:

python3 main.py <mode> <path>

mode 可选 mp3 / lrc, 对指定目录 (<path>) 中此后缀 (<mode>) 的文件进行处理 (不会扫描子目录)

python3 main.py <mode> <path> <prefix>

同上, 但是增加筛选条件: 文件名必须以指定的前缀 (<prefix>) 开头

python3 main.py <mode> <path> <prefix_start> <prefix_end> [prefix_spliter]

同 1., 但是增加筛选条件: 文件名必须以 <prefix_start> 和 <prefix_end> 区间内的数字 + [prefix_spliter] (默认为 _) 开头

比如你执行 python3 main.py mp3 ./ 1 3 (1 - 3), 文件夹中有以下文件:

1
1_617700852_Shadow Of The Sun.mp3
2
1_617700852_Shadow Of The Sun.lrc
3
2_676186170_See You Again.mp3
4
2_676186170_See You Again.lrc
5
3_549442278_Far Away From Home.mp3
6
4_85054372_溯.mp3
7
5_400766871_平凡之路.mp3

则会匹配到以下文件:

1
1_617700852_Shadow Of The Sun.mp3
2
2_676186170_See You Again.mp3
3
3_549442278_Far Away From Home.mp3
4
4_85054372_溯.mp3
5
5_400766871_平凡之路.mp3

Usage

1
Usage:
2
  - main.py <mode> <path>
3
  - main.py <mode> <path> <prefix>
4
  - main.py <mode> <path> <prefix_start> <prefix_end> [prefix_spliter]
5
    * contains prefix_start and prefix_end
6
    * perfix_spliter is _ by default

End#

Enjoy!