2011/08/05

打倒文字化け

すごくめんどくさかったというお話。

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Text.RegularExpressions;
using System.IO;
using System.Runtime.InteropServices;

namespace VSClipboardTest
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        // にほんご

        [DllImport("user32.dll")]
        private static extern bool OpenClipboard(IntPtr hWndNewOwner);
        [DllImport("user32.dll")]
        private static extern int IsClipboardFormatAvailable(int wFormat);
        [DllImport("user32.dll")]
        private static extern IntPtr GetClipboardData(int wFormat);
        [DllImport("user32.dll")]
        private static extern int CloseClipboard();
        [DllImport("user32.dll")]
        private static extern int RegisterClipboardFormat(string lpszFormat);

        private void button1_Click(object sender, EventArgs e)
        {
            OpenClipboard(IntPtr.Zero);
            int wFormat = RegisterClipboardFormat("Rich Text Format");
            IntPtr clipboard = GetClipboardData(wFormat);
            if (clipboard != IntPtr.Zero)
            {
                textBox1.Text = Marshal.PtrToStringAnsi(clipboard);
                textBox2.Text = Rtf2Html(textBox1.Text);
            }
            CloseClipboard();
        }

        private string Rtf2Html(string rtf)
        {
            var fontTable = new List<string>();
            var colorTable = new List<Color>();
            
            // load font table
            int fontTablePosition = rtf.IndexOf(@"{\fonttbl");
            if (fontTablePosition >= 0)
            {
                int fontTableEnd = rtf.IndexOf("}}", fontTablePosition + 10);
                if (fontTableEnd >= 0)
                {
                    string fontTableData = rtf.Substring(fontTablePosition + 10, fontTableEnd - fontTablePosition - 10);
                    foreach (string tableItem in fontTableData.Split(new[] { ';' }))
                    {
                        string[] pair = tableItem.Split(new[] { ' ' }, 2);
                        if (pair.Length == 2)
                        {
                            fontTable.Add(pair[1]);
                        }
                    }
                    
                    rtf = rtf.Remove(fontTablePosition, fontTableEnd - fontTablePosition + 2);
                }
            }

            // load color table
            colorTable.Add(Color.Black);
            int colorTablePosition = rtf.IndexOf(@"{\colortbl");
            if (colorTablePosition >= 0)
            {
                int colorTableEnd = rtf.IndexOf("}", colorTablePosition + 11);
                if (colorTableEnd >= 0)
                {
                    string colorTableData = rtf.Substring(colorTablePosition + 11, colorTableEnd - colorTablePosition - 11);
                    foreach (string colorItem in colorTableData.Split(new[] { ';' }))
                    {
                        Regex colorRegex = new Regex(@"\\red(?<r>\d+)\\green(?<g>\d+)\\blue(?<b>\d+)");
                        Match colorMatch = colorRegex.Match(colorItem);
                        if (colorMatch.Success)
                        {
                            colorTable.Add(
                                Color.FromArgb(
                                    int.Parse(colorMatch.Groups["r"].Value),
                                    int.Parse(colorMatch.Groups["g"].Value),
                                    int.Parse(colorMatch.Groups["b"].Value)));
                        }
                    }
                rtf = rtf.Remove(colorTablePosition, colorTableEnd - colorTablePosition + 1);
                }
            }

            // remove \rtf\ansi
            var rtfansiPattern = new Regex(@"(?<!\\)\\rtf\\ansi");
            rtf = rtfansiPattern.Replace(rtf, "");

            // remove header
            rtf = rtf.Remove(0, 1);
            rtf = rtf.Remove(rtf.Length - 1, 1);

            // convert return codes
            var crlfPattern = new Regex(@"(?<!\\)\\par ");
            rtf = crlfPattern.Replace(rtf, "\r\n");

            // html special characters
            rtf = rtf.Replace("&""&amp;").Replace("<""&lt;").Replace(">""&gt;");
            var spacePattern = new Regex(@"(?<!\\[a-zA-Z0-9]+?) ");
            rtf = spacePattern.Replace(rtf, "&nbsp;");

            // coloring
            var cf0Pattern = new Regex(@"(?<!\\)\\cf0 ");
            rtf = cf0Pattern.Replace(rtf, "</span>");
            for (int i = 1; i < colorTable.Count; i++)
            {
                var cfnPattern = new Regex(@"(?<!\\)\\cf" + i.ToString() + " ");
                rtf = cfnPattern.Replace(rtf, string.Format("<span style='color: #{0:X6};'>", colorTable[i].ToArgb() & 0x00FFFFFF));
            }
            if (rtf.LastIndexOf("</span") < rtf.LastIndexOf("<span") && rtf.LastIndexOf("</span") >= 0)
            {
                rtf += "</span>";
            }

            // unicode
            var unicodeInput = new Regex(@"(?<!\\)\\uinput2\\u(?<code>\d+) ..");
            while (true)
            {
                var match = unicodeInput.Match(rtf);
                if (match.Success)
                {
                    char c = (char)int.Parse(match.Groups["code"].Value);
                    rtf = unicodeInput.Replace(rtf, c.ToString(), 1);
                }
                else
                {
                    break;
                }
            }

            // remove rtf tags
            Regex rtfTag = new Regex(@"(?<!\\)\\[a-zA-Z0-9]+?[\s{}]");
            rtf = rtfTag.Replace(rtf, "");

            // convert special characters
            rtf = rtf.Replace("\\\\""\\");
            rtf = rtf.Replace("\\{""{");
            rtf = rtf.Replace("\\}""}");

            // add pre
            rtf = "<pre class='code'>" + rtf + "</pre>";

            return rtf;
        }
    }
}

あとは Live Writer 用のプラグイン作れば完了な感じなんだろうか。

しかし酷いコードだ。

0 件のコメント:

コメントを投稿