下のコードをそのまま全部貼り付け(書き換え不要)。
import email, json, re
from email.header import decode_header
from email.utils import parsedate_to_datetime
from datetime import datetime
from pathlib import Path
from imapclient import IMAPClient
BASE = Path(__file__).resolve().parent
CONFIG = json.loads((BASE / "config.json").read_text("utf-8"))
SEEN_FILE = BASE / "seen.txt"
LOG_FILE = BASE / "archive.log"
def log(msg):
line = f"[{datetime.now().isoformat(timespec='seconds')}] {msg}\n"
LOG_FILE.open("a", encoding="utf-8").write(line)
def load_seen():
if not SEEN_FILE.exists():
return set()
return set(SEEN_FILE.read_text("utf-8").splitlines())
def add_seen(uid):
SEEN_FILE.open("a", encoding="utf-8").write(uid + "\n")
def decode(s):
if s is None:
return ""
parts = decode_header(s)
return "".join(
(b.decode(enc or "utf-8", errors="replace") if isinstance(b, bytes) else b)
for b, enc in parts
)
def extract_body(msg):
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
payload = part.get_payload(decode=True) or b""
charset = part.get_content_charset() or "utf-8"
return payload.decode(charset, errors="replace")
return ""
payload = msg.get_payload(decode=True) or b""
charset = msg.get_content_charset() or "utf-8"
return payload.decode(charset, errors="replace")
def safe_filename(name, maxlen=80):
name = re.sub(r'[\\/:*?"<>|]', "", name)
name = name.strip()
return name[:maxlen] if len(name) > maxlen else name
def main():
seen = load_seen()
new_count = 0
with IMAPClient("imap.gmail.com", ssl=True) as M:
M.login(CONFIG["gmail_address"], CONFIG["app_password"])
M.select_folder(CONFIG["label"])
ids = M.search(["ALL"])
if not ids:
log("no messages found")
return
msgs = M.fetch(ids, ["RFC822"])
for uid, data in msgs.items():
raw = data[b"RFC822"]
msg = email.message_from_bytes(raw)
msg_id = msg.get("Message-ID", "").strip()
if not msg_id or msg_id in seen:
continue
subject = decode(msg.get("Subject", ""))
date_hdr = msg.get("Date", "")
try:
dt = parsedate_to_datetime(date_hdr)
except Exception:
dt = datetime.now()
year_folder = Path(CONFIG["vault_base_path"]) / f"{dt.year}年"
year_folder.mkdir(parents=True, exist_ok=True)
date_label = f"『{dt.month}.{dt.day}』"
if re.search(r"『\d+\.\d+』", subject):
base_name = subject
else:
base_name = f"{date_label}{subject}"
filename = safe_filename(base_name) + ".md"
out_path = year_folder / filename
if out_path.exists():
stem = out_path.stem
n = 2
while (year_folder / f"{stem} ({n}).md").exists():
n += 1
out_path = year_folder / f"{stem} ({n}).md"
body = extract_body(msg)
front = (
f"---\n"
f"date: {dt.isoformat(timespec='minutes')}\n"
f"subject: {subject}\n"
f"from: {decode(msg.get('From',''))}\n"
f"---\n\n"
)
out_path.write_text(front + body, encoding="utf-8")
add_seen(msg_id)
seen.add(msg_id)
new_count += 1
log(f"saved: {dt.year}年/{out_path.name}")
log(f"done: {new_count} new")
if __name__ == "__main__":
main()
- Control+O → Enter → Control+X で保存
★
imapclient を使う理由: Pythonの標準imaplibは日本語ラベル名でエラーを吐きます(UnicodeEncodeErrorまたはCould not parse command)。imapclientはGmailの日本語ラベルをUTF-7に正しく変換してくれます。