converted old daimyo content

November 6, 2016

I spent the morning dumping the Drupal database that used to power this website. Since it was such an old version of Drupal, the existing drupal2hugo converter did not work, so I wrote my own.

Be warned now that it’s a hack, and will probably delete all your data. That said, I’ve included it here for anyone who may be Googling in the future.

#!/usr/bin/python
from __future__ import unicode_literals

import datetime
import html2text
import MySQLdb as mdb
import re

def file_guess(title):
    title = title.replace(' ', '-')
    title = title.replace('.', '-')
    title = title.replace('?', '')
    title = title.replace('"', '')
    title = title.replace('%', 'percent')
    title = title.replace('&', 'percent')
    title = title.replace('$', '')
    title = title.replace('/', '')
    title = title.replace('>', '')
    title = title.replace('<', '')
    title = title.replace(',', '')
    title = title.replace("'", '')
    title = title.replace(":", '')

    if title[-1] == '-':
        title = title[:-1]

    title = re.sub('-+\(.*\)$', '', title)
    title = re.sub('-{2,}', '-', title)
    title = re.sub('-\(.*$', '', title)
    title = re.sub('\([r|R]\)', '', title)

    title = title.lower()
    title = title + '.md'

    return title

db = mdb.connect(host='localhost',  user='',  passwd='',
        db='drupal', charset='utf8', use_unicode=True)

cur = db.cursor(mdb.cursors.DictCursor)

cur.execute('select * from node where type="blog"')

for row in cur.fetchall():
    f = open('content/%s' % file_guess(row['title']), 'w+')
    print 'processing file_name: %s' % file_guess(row['title'])
    print >>f,'+++'
    print >>f, 'title = "%s"' % row['title']
    print >>f, 'draft = false'
    print >>f, 'date = %s' % datetime.datetime.fromtimestamp(int(row['created'])).strftime('%Y-%m-%dT%H:%M:%S-07:00')
    print >>f, ''
    print >>f, '+++'
    print >>f, ''
    try:
        print >>f, html2text.html2text(row['body'].encode('ascii', 'ignore'))
    except Exception as e:
        print 'could not convert: %s because %s' % (row['title'], e)

    f.close()

db.close()
comments powered by Disqus