Files
offline-taiwan-dic/dic
2026-05-12 01:50:48 +00:00

478 lines
13 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# dic — 命令列字典查詢工具bash 版)
# 用法:
# dic 香蕉 兩本字典都查
# dic -r 香蕉 只查重編修訂本
# dic -c 香蕉 只查簡編本
# dic -a 香蕉 顯示所有匹配(包含部分匹配)
# dic -l 香蕉 只列出匹配的字詞名清單
# dic --no-color 香蕉 不染色
# dic -h 顯示說明
set -u
# ========================================
# 設定區:改成你的 CSV 檔案完整路徑
DICT_NAME_1="重編修訂本"
DICT_PATH_1="$HOME/.local/share/dic/dict_revised.csv"
DICT_NAME_2="簡編本"
DICT_PATH_2="$HOME/.local/share/dic/dict_concise.csv"
# ========================================
PROG_NAME="dic"
show_help() {
cat <<'EOF'
dic — 命令列字典查詢工具
用法:
dic 香蕉 兩本字典都查
dic -r 香蕉 只查重編修訂本
dic -c 香蕉 只查簡編本
dic -a 香蕉 顯示所有匹配(包含部分匹配)
dic -l 香蕉 只列出匹配的字詞名清單
dic --no-color 香蕉 不染色
dic -h 顯示說明
EOF
}
# --- 顏色設定 ---
COLOR_ENABLED=1
setup_color() {
local force_off="$1"
if [[ "$force_off" == "1" ]] || [[ ! -t 1 ]]; then
COLOR_ENABLED=0
fi
}
c_wrap() {
# $1 = 顏色 code, $2 = 文字
if [[ "$COLOR_ENABLED" == "0" ]]; then
printf '%s' "$2"
else
printf '\033[%sm%s\033[0m' "$1" "$2"
fi
}
c_bold() { c_wrap "1" "$1"; }
c_dim() { c_wrap "2" "$1"; }
c_title() { c_wrap "1;36" "$1"; } # 粗體青色
c_zhuyin() { c_wrap "33" "$1"; } # 黃色
c_book() { c_wrap "1;35" "$1"; } # 粗體紫色(字典名)
c_hint() { c_wrap "2;37" "$1"; } # 淡灰色提示
# 詞性 [名][動] 等,要在釋義裡 inline 染色,由 awk 處理
# --- CSV 解析 ---
# 完整處理 RFC 4180 風格 CSV
# - 雙引號內可以有逗號、換行
# - 連續兩個雙引號 "" 代表一個字面雙引號
# 輸出格式:每筆紀錄一行,欄位之間用 \x1f (US, unit separator) 分隔,
# 欄位內的換行保留為字面 \n兩個字元方便後面再還原。
#
# 第一行header也會被輸出呼叫端負責跳過。
parse_csv() {
# $1 = 檔案路徑
awk -v FS="" '
BEGIN {
in_quote = 0
field = ""
nfields = 0
# 用陣列存當前 record 的所有欄位
delete fields
}
{
# awk 一行一行讀進來;如果上一行還在引號裡,就把換行補回去
if (in_quote) {
field = field "\\n" # 字面 \n 兩字元,避免破壞分隔
}
line = $0
n = length(line)
for (i = 1; i <= n; i++) {
ch = substr(line, i, 1)
if (in_quote) {
if (ch == "\"") {
# 看下一個字元,判斷是 escaped quote 還是收尾
next_ch = (i < n) ? substr(line, i+1, 1) : ""
if (next_ch == "\"") {
field = field "\""
i++
} else {
in_quote = 0
}
} else {
field = field ch
}
} else {
if (ch == "\"") {
in_quote = 1
} else if (ch == ",") {
fields[nfields++] = field
field = ""
} else {
field = field ch
}
}
}
# 行尾:如果不在引號內,代表一筆 record 結束
if (!in_quote) {
fields[nfields++] = field
# 印出這筆 record欄位用 \x1f 分隔
out = ""
for (k = 0; k < nfields; k++) {
if (k > 0) out = out "\x1f"
out = out fields[k]
}
print out
# 重置
field = ""
nfields = 0
delete fields
}
}
' "$1"
}
# 找出 header 中「字詞名」「注音一式」「釋義」的欄位編號
# 輸出三個數字,用空格分隔
find_columns() {
local file="$1"
parse_csv "$file" | head -n 1 | awk -F$'\x1f' '
{
idx_name = -1; idx_zhuyin = -1; idx_def = -1
for (i = 1; i <= NF; i++) {
if ($i == "字詞名") idx_name = i
if ($i == "注音一式") idx_zhuyin = i
if ($i == "釋義") idx_def = i
}
printf "%d %d %d\n", idx_name, idx_zhuyin, idx_def
}
'
}
# 載入字典並依條件篩選
# $1 = 檔案路徑
# $2 = query
# $3 = mode: "exact" 或 "all"
# 輸出每筆結果name \x1f zhuyin \x1f definition其中 definition 內的換行還是字面 \n
search_dict() {
local file="$1"
local query="$2"
local mode="$3"
if [[ ! -f "$file" ]]; then
# 警告寫到 stderr
if [[ "$COLOR_ENABLED" == "1" ]]; then
printf '\033[2m警告\033[0m找不到字典檔 %s\n' "$file" >&2
else
printf '警告:找不到字典檔 %s\n' "$file" >&2
fi
return
fi
local cols
cols=$(find_columns "$file")
local idx_name idx_zhuyin idx_def
read -r idx_name idx_zhuyin idx_def <<< "$cols"
if [[ "$idx_name" == "-1" ]]; then
return
fi
parse_csv "$file" | awk -F$'\x1f' \
-v idx_name="$idx_name" \
-v idx_zhuyin="$idx_zhuyin" \
-v idx_def="$idx_def" \
-v query="$query" \
-v mode="$mode" '
NR == 1 { next } # 跳過 header
{
name = $idx_name
if (name == "") next
if (mode == "exact") {
if (name != query) next
sort_key = "0\t0\t" length(name) "\t" name
} else {
if (index(name, query) == 0) next
exact = (name == query) ? 0 : 1
starts = (index(name, query) == 1) ? 0 : 1
sort_key = exact "\t" starts "\t" length(name) "\t" name
}
zhuyin = (idx_zhuyin > 0) ? $idx_zhuyin : ""
def = (idx_def > 0) ? $idx_def : ""
# 用 sort_key 開頭,方便外面用 sort 排,後面再砍掉
# 欄位sort_key \x1f name \x1f zhuyin \x1f definition
printf "%s\x1f%s\x1f%s\x1f%s\n", sort_key, name, zhuyin, def
}
' | LC_ALL=C sort -t $'\x1f' -k1,1 | awk -F$'\x1f' '
{
# 砍掉第一欄 sort_key
out = ""
for (i = 2; i <= NF; i++) {
if (i > 2) out = out "\x1f"
out = out $i
}
print out
}
'
}
# 算字典裡「包含 query 但不完全相符」的筆數
count_partial() {
local file="$1"
local query="$2"
if [[ ! -f "$file" ]]; then
echo 0
return
fi
local cols
cols=$(find_columns "$file")
local idx_name _z _d
read -r idx_name _z _d <<< "$cols"
if [[ "$idx_name" == "-1" ]]; then
echo 0
return
fi
parse_csv "$file" | awk -F$'\x1f' \
-v idx_name="$idx_name" \
-v query="$query" '
NR == 1 { next }
{
name = $idx_name
if (name == "") next
if (index(name, query) > 0 && name != query) c++
}
END { print c+0 }
'
}
# 把釋義裡的 [名][動][形] 之類染色
# 從 stdin 讀,印到 stdout
colorize_definition() {
if [[ "$COLOR_ENABLED" == "0" ]]; then
cat
return
fi
# 詞性標記:[ 後面 1~4 個非 []\n 字元,接 ]
# 用 sed 的 ERE
sed -E $'s/(\\[[^][\\n]{1,4}\\])/\033[1;32m\\1\033[0m/g'
}
# 印一筆字典資料
# $1 = book_name
# $2 = name
# $3 = zhuyin
# $4 = definition其中換行為字面 \n 兩字元)
print_entry() {
local book="$1"
local name="$2"
local zhuyin="$3"
local def="$4"
printf ' %s\n' "$(c_book "$book")"
printf ' %s %s\n' "$(c_title "$name")" "$(c_zhuyin "$zhuyin")"
if [[ -n "$def" ]]; then
# 把字面 \n 還原成真的換行,每行縮排 4 格,然後染色詞性標記
# 並且把行尾的空白砍掉(對應 Python 的 .rstrip()
printf '%s' "$def" \
| awk 'BEGIN{RS="\\\\n"} { sub(/[ \t\r]+$/, ""); print }' \
| sed -E '$ { /^$/d; }' \
| colorize_definition \
| sed 's/^/ /'
fi
printf '\n'
}
# --- 主程式 ---
# 解析參數
QUERY=""
OPT_REVISED=0
OPT_CONCISED=0
OPT_ALL=0
OPT_LIST=0
OPT_NO_COLOR=0
OPT_HELP=0
# 自己處理參數(不用 getopt因為要支援 --long 且不想引外部依賴)
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
OPT_HELP=1; shift ;;
-r|--revised)
OPT_REVISED=1; shift ;;
-c|--concised)
OPT_CONCISED=1; shift ;;
-a|--all)
OPT_ALL=1; shift ;;
-l|--list)
OPT_LIST=1; shift ;;
--no-color)
OPT_NO_COLOR=1; shift ;;
--)
shift
if [[ $# -gt 0 ]]; then QUERY="$1"; shift; fi
;;
-*)
# 支援 -rc 這種合併短參數
arg="${1#-}"
if [[ "$arg" =~ ^[rcalh]+$ ]]; then
for (( i=0; i<${#arg}; i++ )); do
ch="${arg:$i:1}"
case "$ch" in
r) OPT_REVISED=1 ;;
c) OPT_CONCISED=1 ;;
a) OPT_ALL=1 ;;
l) OPT_LIST=1 ;;
h) OPT_HELP=1 ;;
esac
done
shift
else
printf 'dic: 未知選項 %s\n' "$1" >&2
exit 2
fi
;;
*)
if [[ -z "$QUERY" ]]; then
QUERY="$1"
fi
shift ;;
esac
done
if [[ "$OPT_HELP" == "1" ]] || [[ -z "$QUERY" ]]; then
show_help
exit 0
fi
setup_color "$OPT_NO_COLOR"
# 決定要查哪幾本
declare -a CHOSEN_NAMES CHOSEN_PATHS
if [[ "$OPT_REVISED" == "1" && "$OPT_CONCISED" == "1" ]]; then
CHOSEN_NAMES=("$DICT_NAME_1" "$DICT_NAME_2")
CHOSEN_PATHS=("$DICT_PATH_1" "$DICT_PATH_2")
elif [[ "$OPT_REVISED" == "1" ]]; then
CHOSEN_NAMES=("$DICT_NAME_1")
CHOSEN_PATHS=("$DICT_PATH_1")
elif [[ "$OPT_CONCISED" == "1" ]]; then
CHOSEN_NAMES=("$DICT_NAME_2")
CHOSEN_PATHS=("$DICT_PATH_2")
else
CHOSEN_NAMES=("$DICT_NAME_1" "$DICT_NAME_2")
CHOSEN_PATHS=("$DICT_PATH_1" "$DICT_PATH_2")
fi
# -l 列表模式 → 自動切到 all
if [[ "$OPT_ALL" == "1" || "$OPT_LIST" == "1" ]]; then
MODE="all"
else
MODE="exact"
fi
# 收集每本字典的結果。
# 因為 bash 沒有結構化資料,把每本字典的結果暫存到一個 tmp 檔。
TMP_DIR=$(mktemp -d)
trap 'rm -rf "$TMP_DIR"' EXIT
TOTAL_RESULTS=0
TOTAL_PARTIAL=0
NUM_BOOKS=${#CHOSEN_NAMES[@]}
# partial counts per book給 exact 模式用,提示「另有 N 筆包含」)
declare -a BOOK_RESULT_COUNTS BOOK_PARTIAL_COUNTS BOOK_RESULT_FILES
for (( bi=0; bi<NUM_BOOKS; bi++ )); do
name="${CHOSEN_NAMES[$bi]}"
path="${CHOSEN_PATHS[$bi]}"
out_file="$TMP_DIR/book_$bi"
search_dict "$path" "$QUERY" "$MODE" > "$out_file"
count=$(wc -l < "$out_file" | tr -d ' ')
BOOK_RESULT_COUNTS[$bi]="$count"
BOOK_RESULT_FILES[$bi]="$out_file"
TOTAL_RESULTS=$(( TOTAL_RESULTS + count ))
if [[ "$MODE" == "exact" ]]; then
partial=$(count_partial "$path" "$QUERY")
else
partial=0
fi
BOOK_PARTIAL_COUNTS[$bi]="$partial"
TOTAL_PARTIAL=$(( TOTAL_PARTIAL + partial ))
done
# --- 列表模式:只印字詞名 ---
if [[ "$OPT_LIST" == "1" ]]; then
for (( bi=0; bi<NUM_BOOKS; bi++ )); do
count="${BOOK_RESULT_COUNTS[$bi]}"
[[ "$count" == "0" ]] && continue
name="${CHOSEN_NAMES[$bi]}"
file="${BOOK_RESULT_FILES[$bi]}"
printf '%s%d 筆)\n' "$(c_book "$name")" "$count"
while IFS=$'\x1f' read -r n zh _; do
printf ' %s %s\n' "$(c_title "$n")" "$(c_dim "$zh")"
done < "$file"
printf '\n'
done
if [[ "$TOTAL_RESULTS" == "0" ]]; then
printf '找不到「%s」\n' "$QUERY"
exit 1
fi
exit 0
fi
# --- 一般輸出 ---
if [[ "$TOTAL_RESULTS" == "0" ]]; then
printf '找不到「%s」\n' "$(c_bold "$QUERY")"
if [[ "$TOTAL_PARTIAL" -gt 0 && "$MODE" == "exact" ]]; then
printf '%s\n' "$(c_hint "但有 $TOTAL_PARTIAL 筆字詞包含「$QUERY」,加 -a 查看全部")"
fi
exit 1
fi
# 標題
if [[ "$MODE" == "all" ]]; then
printf '%s%s %s\n\n' \
"$(c_dim "查詢:")" \
"$(c_bold "$QUERY")" \
"$(c_dim "(共 $TOTAL_RESULTS 筆匹配)")"
else
printf '%s%s\n\n' "$(c_dim "查詢:")" "$(c_bold "$QUERY")"
fi
# 各本字典的結果
for (( bi=0; bi<NUM_BOOKS; bi++ )); do
name="${CHOSEN_NAMES[$bi]}"
count="${BOOK_RESULT_COUNTS[$bi]}"
partial="${BOOK_PARTIAL_COUNTS[$bi]}"
file="${BOOK_RESULT_FILES[$bi]}"
if [[ "$count" == "0" ]]; then
if [[ "$MODE" == "exact" && "$partial" -gt 0 ]]; then
printf ' %s\n' "$(c_book "$name")"
printf ' %s\n\n' "$(c_hint "沒有完全相符,但有 $partial 筆包含此字")"
fi
continue
fi
while IFS=$'\x1f' read -r n zh def; do
print_entry "$name" "$n" "$zh" "$def"
done < "$file"
done
# 結尾提示
if [[ "$MODE" == "exact" && "$TOTAL_PARTIAL" -gt 0 ]]; then
printf '%s\n' "$(c_hint "另有 $TOTAL_PARTIAL 筆字詞包含「$QUERY」,加 -a 查看全部,或 -l 只看清單")"
fi
exit 0