/****************************************************************************** Calligrapher -AsciiArtClip Utility- DAT→AAC変換ツールソースファイル Coded by Wraith in Jan 21, 2006. ******************************************************************************/ // Tab幅を4文字に設定して表示させてください。 /////////////////////////////////////////////////////////////////////////////// // // ■ dattoaac.cpp // http://tricklib.com/cxx/ex/calligrapher/dattoaac.cpp // // □ リファレンス・サポートページ // http://tricklib.com/cxx/ex/calligrapher/ // // □ ライセンス情報 // http://tricklib.com/license.htm // // □ コンパイル方法 // case Borland C++ // bcc32 -WU dattoaac.cpp // case Visual C++ // cl /GX dattoaac.cpp // case CodeWarrior // mwcc dattoaac.cpp // case gcc // ...gccでも何箇所か弄ればコンパイル通るだろうけどマンドクセ('A`) // // #include #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma comment(lib, "USER32.lib") #endif #ifndef ARRAY_SIZE #define ARRAY_SIZE(X) (sizeof(X)/sizeof(X[0])) #endif // ARRAY_SIZE #ifndef ARRAY_END #define ARRAY_END(X) (X +ARRAY_SIZE(X)) #endif // ARRAY_END ////////////////////////////////////////////////////////////////////////////// // // Declare auto closers // template class demi { public: T value; demi() {} demi(const T &X) :value(X) {} operator T& () { return value; } operator const T& () const { return value; } T * operator & () { return &value; } const T * operator & () const { return &value; } T & operator () () { return value; } const T & operator () () const { return value; } }; #define DECLARE_AUTO_CLOSE(name, type, param, init, close_command) \ class name :public demi \ { \ public: \ name(param) :demi(init) { } \ ~name() \ { \ close(); \ } \ void close() \ { \ if (value) \ { \ close_command; \ value = NULL; \ } \ } \ }; DECLARE_AUTO_CLOSE(AUTO_HANDLE, HANDLE, HANDLE X = NULL, X, INVALID_HANDLE_VALUE != value && (CloseHandle(value), false)) DECLARE_AUTO_CLOSE(AUTO_FINDHANDLE, HANDLE, HANDLE X = NULL, X, INVALID_HANDLE_VALUE != value && (FindClose(value), false)) ////////////////////////////////////////////////////////////////////////////// // // Win32 Error // class win32_error :public std::runtime_error { DWORD code; public: win32_error(DWORD X_code = GetLastError()) :std::runtime_error(win32_error::make_error_message(X_code)), code(X_code) { } static const std::string make_error_message(DWORD X_code = GetLastError()); DWORD get_error_code() const { return code; } const char * get_error_message() const { return what(); } }; const std::string win32_error::make_error_message(DWORD X_code) { LPVOID lpMsgBuf; FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, X_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&lpMsgBuf, 0, NULL); const std::string result = (const char *)lpMsgBuf; LocalFree(lpMsgBuf); return result; } inline HANDLE check_HANDLE(HANDLE stream) { if (NULL == stream || INVALID_HANDLE_VALUE == stream) { throw win32_error(); } return stream; } ////////////////////////////////////////////////////////////////////////////// // // ファイルオープン(読み込み専用、自動クローズ) // class OpenReadFile :public AUTO_HANDLE { public: OpenReadFile(const char *filename) :AUTO_HANDLE( CreateFileA( filename, GENERIC_READ, FILE_SHARE_READ |FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL)) { } OpenReadFile(const wchar_t *filename) :AUTO_HANDLE( CreateFileW( filename, GENERIC_READ, FILE_SHARE_READ |FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL)) { } }; ////////////////////////////////////////////////////////////////////////////// // // ファイルオープン(書き込み専用、自動クローズ) // class OpenWriteFile :public AUTO_HANDLE { public: OpenWriteFile(const char *filename) :AUTO_HANDLE( CreateFileA( filename, GENERIC_WRITE, FILE_SHARE_READ |FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { } OpenWriteFile(const wchar_t *filename) :AUTO_HANDLE( CreateFileW( filename, GENERIC_WRITE, FILE_SHARE_READ |FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { } }; ////////////////////////////////////////////////////////////////////////////// // // 文字列分解 // template void split(const string_type &source, const string_type &delimiter, std::vector *destination) { const std::string::size_type delimiter_size = delimiter.size(); assert(0 < delimiter_size); destination->clear(); std::string::size_type p1 = 0; // = std::string::npos; // ※C++の規格的には std::string::npos でいいはずなんだけど、 //  VC6では 0 じゃないとダメ。まぁ、標準化前の製品だから仕方がないんだけど。 while(true) { std::string::size_type p2 = source.find(delimiter, p1); if (std::string::npos == p2) { break; } if (std::string::npos == p1) { destination->push_back(source.substr(0, p2)); } else { destination->push_back(source.substr(p1, p2 -p1)); } p1 = p2 +delimiter_size; } if (std::string::npos == p1) { destination->push_back(source); } else { destination->push_back(source.substr(p1)); } } ////////////////////////////////////////////////////////////////////////////// // // 文字列置換 // template string_type & string_replace(string_type &X, const string_type &X_search, const string_type &X_replace) { const typename string_type::size_type search_length = X_search.length(); const typename string_type::size_type replace_length = X_replace.length(); typename string_type::size_type p = 0; while(X.npos != (p = X.find(X_search, p))) { X.replace(p, search_length, X_replace); p += replace_length; } return X; } ////////////////////////////////////////////////////////////////////////////// // // HTMLデコード用マッピングデータ // struct html_decode_type { const wchar_t *escape; wchar_t code; }; const html_decode_type html_decode_table[] = { L"quot", 34, L"amp", 38, L"lt", 60, L"gt", 62, L"nbsp", 160, L"iexcl", 161, L"cent", 162, L"pound", 163, L"curren", 164, L"yen", 165, L"brvbar", 166, L"sect", 167, L"uml", 168, L"copy", 169, L"ordf", 170, L"laquo", 171, L"not", 172, L"shy", 173, L"reg", 174, L"macr", 175, L"deg", 176, L"plusmn", 177, L"sup2", 178, L"sup3", 179, L"acute", 180, L"micro", 181, L"para", 182, L"middot", 183, L"cedil", 184, L"sup1", 185, L"ordm", 186, L"raquo", 187, L"frac14", 188, L"frac12", 189, L"frac34", 190, L"iquest", 191, L"Agrave", 192, L"Aacute", 193, L"Acirc", 194, L"Atilde", 195, L"Auml", 196, L"Aring", 197, L"AElig", 198, L"Ccedil", 199, L"Egrave", 200, L"Eacute", 201, L"Ecirc", 202, L"Euml", 203, L"Igrave", 204, L"Iacute", 205, L"Icirc", 206, L"Iuml", 207, L"ETH", 208, L"Ntilde", 209, L"Ograve", 210, L"Oacute", 211, L"Ocirc", 212, L"Otilde", 213, L"Ouml", 214, L"times", 215, L"Oslash", 216, L"Ugrave", 217, L"Uacute", 218, L"Ucirc", 219, L"Uuml", 220, L"Yacute", 221, L"THORN", 222, L"szlig", 223, L"agrave", 224, L"aacute", 225, L"acirc", 226, L"atilde", 227, L"auml", 228, L"aring", 229, L"aelig", 230, L"ccedil", 231, L"egrave", 232, L"eacute", 233, L"ecirc", 234, L"euml", 235, L"igrave", 236, L"iacute", 237, L"icirc", 238, L"iuml", 239, L"eth", 240, L"ntilde", 241, L"ograve", 242, L"oacute", 243, L"ocirc", 244, L"otilde", 245, L"ouml", 246, L"divide", 247, L"oslash", 248, L"ugrave", 249, L"uacute", 250, L"ucirc", 251, L"uuml", 252, L"yacute", 253, L"thorn", 254, L"yuml", 255, L"OElig", 338, L"oelig", 339, L"Scaron", 352, L"scaron", 353, L"Yuml", 376, L"fnof", 402, L"circ", 710, L"tilde", 732, L"Alpha", 913, L"Beta", 914, L"Gamma", 915, L"Delta", 916, L"Epsilon", 917, L"Zeta", 918, L"Eta", 919, L"Theta", 920, L"Iota", 921, L"Kappa", 922, L"Lambda", 923, L"Mu", 924, L"Nu", 925, L"Xi", 926, L"Omicron", 927, L"Pi", 928, L"Rho", 929, L"Sigma", 931, L"Tau", 932, L"Upsilon", 933, L"Phi", 934, L"Chi", 935, L"Psi", 936, L"Omega", 937, L"alpha", 945, L"beta", 946, L"gamma", 947, L"delta", 948, L"epsilon", 949, L"zeta", 950, L"eta", 951, L"theta", 952, L"iota", 953, L"kappa", 954, L"lambda", 955, L"mu", 956, L"nu", 957, L"xi", 958, L"omicron", 959, L"pi", 960, L"rho", 961, L"sigmaf", 962, L"sigma", 963, L"tau", 964, L"upsilon", 965, L"phi", 966, L"chi", 967, L"psi", 968, L"omega", 969, L"thetasym", 977, L"upsih", 978, L"piv", 982, L"ensp", 8194, L"emsp", 8195, L"thinsp", 8201, L"zwnj", 8204, L"zwj", 8205, L"lrm", 8206, L"rlm", 8207, L"ndash", 8211, L"mdash", 8212, L"lsquo", 8216, L"rsquo", 8217, L"sbquo", 8218, L"ldquo", 8220, L"rdquo", 8221, L"bdquo", 8222, L"dagger", 8224, L"Dagger", 8225, L"bull", 8226, L"hellip", 8230, L"permil", 8240, L"prime", 8242, L"Prime", 8243, L"lsaquo", 8249, L"rsaquo", 8250, L"oline", 8254, L"frasl", 8260, L"euro", 8364, L"image", 8465, L"weierp", 8472, L"real", 8476, L"trade", 8482, L"alefsym", 8501, L"larr", 8592, L"uarr", 8593, L"rarr", 8594, L"darr", 8595, L"harr", 8596, L"crarr", 8629, L"lArr", 8656, L"uArr", 8657, L"rArr", 8658, L"dArr", 8659, L"hArr", 8660, L"forall", 8704, L"part", 8706, L"exist", 8707, L"empty", 8709, L"nabla", 8711, L"isin", 8712, L"notin", 8713, L"ni", 8715, L"prod", 8719, L"sum", 8721, L"minus", 8722, L"lowast", 8727, L"radic", 8730, L"prop", 8733, L"infin", 8734, L"ang", 8736, L"and", 8743, L"or", 8744, L"cap", 8745, L"cup", 8746, L"int", 8747, L"there4", 8756, L"sim", 8764, L"cong", 8773, L"asymp", 8776, L"ne", 8800, L"equiv", 8801, L"le", 8804, L"ge", 8805, L"sub", 8834, L"sup", 8835, L"nsub", 8836, L"sube", 8838, L"supe", 8839, L"oplus", 8853, L"otimes", 8855, L"perp", 8869, L"sdot", 8901, L"lceil", 8968, L"rceil", 8969, L"lfloor", 8970, L"rfloor", 8971, L"lang", 9001, L"rang", 9002, L"loz", 9674, L"spades", 9824, L"clubs", 9827, L"hearts", 9829, L"diams", 9830, }; std::map html_decode_map; void init_html_decode_map() { static bool first = true; if (!first) { return; } first = false; for(const html_decode_type *i = html_decode_table; i < ARRAY_END(html_decode_table); ++i) { html_decode_map.insert(std::pair(i->escape, i->code)); } } ////////////////////////////////////////////////////////////////////////////// // // HTMLデコード // const std::string html_decode(const std::string &X) { if (0 == X.size()) { return X; } std::string X_text = X; // 連続する半角スペースの除去 string_replace(X_text, " ", " "); // タグの除去 while(true) { std::string::size_type p_lt = X_text.find("<"); if (std::string::npos == p_lt) { break; } std::string::size_type p_gt = X_text.find(">", p_lt); if (std::string::npos == p_gt) { break; } if (0 == stricmp("
", X_text.substr(p_lt, p_gt -p_lt +1).c_str())) { //
タグだけは改行に変換 X_text = X_text.substr(0, p_lt) +"\n" +X_text.substr(p_gt +1); } else { X_text = X_text.substr(0, p_lt) +X_text.substr(p_gt +1); } } // 行頭の半角スペースの除去 if (' ' == X_text[0]) { X_text = X_text.substr(1); } string_replace(X_text, "\n ", "\n"); // トリム(HTMLのエンコード/デコードとは関係ない) { // 処理の都合上一旦改行を付加しておく。 X_text += "\n"; // 行末の半角スペース/全角スペースの除去 std::string snapshot; do { snapshot = X_text; string_replace(X_text, " \n", "\n"); string_replace(X_text, " \n", "\n"); } while(snapshot != X_text); // 先頭の空行の除去 while(0 < X_text.size() && '\n' == X_text[0]) { X_text = X_text.substr(1); } // 末尾の空行の除去 while(0 < X_text.size() && '\n' == X_text[X_text.size() -1]) { X_text = X_text.substr(0, X_text.size() -1); } } // 改行コードの修正 string_replace(X_text, "\n", "\r\n"); // 一般的なHTMLデコード string_replace(X_text, "<", "<"); string_replace(X_text, ">", ">"); string_replace(X_text, " ", " "); //string_replace(X_text, "&", "&"); 2ch の dat では & はエスケープされない。 if (std::string::npos == X_text.find("&")) { return X_text; } // こっからはUNICODEで処理 init_html_decode_map(); WCHAR wbuffer[16 *1024]; MultiByteToWideChar(CP_ACP, 0, X_text.c_str(), -1, wbuffer, ARRAY_SIZE(wbuffer)); std::wstring WX_text(wbuffer); std::wstring::size_type p = 0; while(true) { std::wstring::size_type p_lt = WX_text.find(L"&", p); if (std::wstring::npos == p_lt) { break; } p = p_lt +1; std::wstring::size_type p_gt = WX_text.find(L";", p_lt); if (std::wstring::npos == p_gt || p_gt <= p_lt +2) { continue; } const std::wstring escape = WX_text.substr(p_lt +1, p_gt -(p_lt +1)); wchar_t code; if (L'#' == escape[0]) { code = (wchar_t)wcstol(escape.data() +1, NULL, 0); } else { code = html_decode_map[escape]; } if (code) { WX_text = WX_text.substr(0, p_lt) +code +WX_text.substr(p_gt +1); } } // BOM を付加した上でUNICODE文字列を無理やり std::string にブチ込む X_text = "\xFF\xFE" +std::string( (const std::string::value_type *)WX_text.data(), WX_text.size() *sizeof(std::wstring::value_type) /sizeof(std::string::value_type)); return X_text; } ////////////////////////////////////////////////////////////////////////////// // // dat@2ch クラス // class dat_of_2ch { public: typedef dat_of_2ch this_type; dat_of_2ch() { } dat_of_2ch(const std::string &line) { parse_line(line); } std::string name; std::string mail; std::string head; std::string body; std::string subject; bool parse_line(const std::string &line); }; bool dat_of_2ch::parse_line(const std::string &line) { // 一旦クリア name.erase(); mail.erase(); head.erase(); body.erase(); subject.erase(); // パーズ std::vector parts; split(line, "<>", &parts); if (5 == parts.size()) { std::vector::const_iterator i = parts.begin(); name = *(i++); mail = *(i++); head = *(i++); body = *(i++); subject = *i; return true; } return false; } ////////////////////////////////////////////////////////////////////////////// // // .dat から .aac を作成 // void dat_to_aac(const std::wstring &dat_file) { // // .dat ファイルの読み込み // OpenReadFile open_file(dat_file.c_str()); HANDLE stream = check_HANDLE(open_file); DWORD read_size = 0; BYTE buffer[16 *1024]; std::string file; while(ReadFile(stream, buffer, sizeof(buffer), &read_size, NULL) && read_size) { file.append((std::string::value_type*)buffer, read_size); } std::vector lines; split(file, "\x0A", &lines); // // 出力用ディレクトリの作成 // std::wstring dir_name; if (4 < dat_file.size() && 0 == _wcsicmp(L".dat", dat_file.substr(dat_file.size() -4, 4).c_str())) { dir_name = dat_file.substr(0, dat_file.size() -4); } else { dir_name = dat_file; } std::wstring prefix; std::wstring::size_type p = dir_name.rfind(L"\\"); if (std::wstring::npos == p) { prefix = dir_name; } else { prefix = dir_name.substr(p +1); } wchar_t subject_buffer[MAX_PATH]; wsprintfW(subject_buffer, L"%S", dat_of_2ch(lines[0]).subject.substr(0, MAX_PATH -1).c_str()); std::wstring subject = subject_buffer; { wchar_t *forbidden_chars[] = { L"\\", L"/", L":", L"*", L"?", L"\"", L"<", L">", L"|", }; for(int i = 0; i < ARRAY_SIZE(forbidden_chars); ++i) { string_replace(subject, forbidden_chars[i], L""); } while(0 < subject.size() && L' ' == subject[subject.size() -1]) { subject = subject.substr(0, subject.size() -1); } } if (0 < subject.size()) { if (std::wstring::npos == p) { dir_name = subject; } else { dir_name = dir_name.substr(0, p +1) +subject; } } CreateDirectoryW(dir_name.c_str(), NULL); // // .aac の出力 // int no = 1; int count = 0; for(std::vector::const_iterator i = lines.begin(); i != lines.end(); ++i) { dat_of_2ch current; if (current.parse_line(*i)) { printf("%d\r", no); wchar_t buffer[MAX_PATH]; wsprintfW(buffer, L"%s\\%s.%04d.aac", dir_name.c_str(), prefix.c_str(), no); const std::string &data = html_decode(current.body); DWORD write_size = 0; WriteFile(check_HANDLE(OpenWriteFile(buffer)), data.data(), data.size(), &write_size, NULL); ++count; } ++no; } WCHAR wbuffer[1024]; wsprintfW(wbuffer, L"完了 %s (%d 件)\n", dat_file.c_str(), count); DWORD write_size; WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), wbuffer, lstrlenW(wbuffer), &write_size, NULL); } ////////////////////////////////////////////////////////////////////////////// // // Command-line Interface // // // ワイルドカードの処理 // void dat_to_aac_ex(const std::wstring &path) { WIN32_FIND_DATAW X_WFD = { 0 }; std::wstring base_path; std::wstring::size_type p = path.rfind(L"\\"); if (std::wstring::npos != p) { base_path = path.substr(0, p +1); } AUTO_FINDHANDLE find_handle = check_HANDLE(FindFirstFileW(path.c_str(), &X_WFD)); do { if (FILE_ATTRIBUTE_DIRECTORY != (FILE_ATTRIBUTE_DIRECTORY &X_WFD.dwFileAttributes)) { dat_to_aac(base_path +X_WFD.cFileName); } } while(FindNextFileW(find_handle, &X_WFD)); } #if !defined(__MWERKS__) #define UNICODE_INTERFACE #endif #if defined(UNICODE_INTERFACE) int wmain(int argc, wchar_t *args[]) #else int main(int argc, char *args[]) #endif { for(int i = 1; i < argc; ++i) { try { #if defined(UNICODE_INTERFACE) dat_to_aac_ex(args[i]); #else wchar_t buffer[MAX_PATH]; wsprintfW(buffer, L"%S", args[i]); dat_to_aac_ex(buffer); #endif } catch(const win32_error &e) { std::cerr << e.what() << std::endl; return e.get_error_code(); } catch(const std::exception &e) { std::cerr << e.what() << std::endl; return E_FAIL; } } if (argc <= 1) { std::cout << "usage : dattoaac [datfile1 [datfile2 [...]]]" << std::endl; std::cout << std::endl; std::cout << "cf. http://tricklib.com/cxx/ex/calligpapher/" << std::endl; } return S_OK; } /****************************************************************************** □■□■ Wraith the Trickster □■□■ ■□■□ 〜I'll go with heaven's advantage and fool's wisdom.〜 ■□■□ ******************************************************************************/