diff options
author | shtrom <shtrom@1991c358-8f32-0410-a49a-990740bdf4c2> | 2014-04-21 12:14:15 +0000 |
---|---|---|
committer | shtrom <shtrom@1991c358-8f32-0410-a49a-990740bdf4c2> | 2014-04-21 12:14:15 +0000 |
commit | 865fb53e267a1d29d36158ea5748dc7ab63e077d (patch) | |
tree | e8813fff7d149ed0d72399277315014a6f329a97 /WordPressImport.pl | |
parent | 020712087b273f244e234ff2cf0d2ddf07c2bf2a (diff) |
[WordpressImport] update for WP 3.8.x, add support for MultiSite blogs, compressed data and absent links
git-svn-id: svn+ssh://scm.narf.ssji.net/svn/shtrom/scripts@1802 1991c358-8f32-0410-a49a-990740bdf4c2
Diffstat (limited to 'WordPressImport.pl')
-rw-r--r-- | WordPressImport.pl | 119 |
1 files changed, 74 insertions, 45 deletions
diff --git a/WordPressImport.pl b/WordPressImport.pl index 81a188d..448dbb3 100644 --- a/WordPressImport.pl +++ b/WordPressImport.pl @@ -79,7 +79,7 @@ use DBI(); use Date::Format; use Getopt::Long; -my $version=0.7; +my $version="0.8sh"; my %OPTS = ( host => 'localhost', user => 'root', @@ -94,6 +94,7 @@ my @OPTIONS= ( "password:s", "wp_password=s", "wp_username=s", + "blogid=s", "import=s", "noratings", "create", @@ -112,6 +113,7 @@ options: -database - database name (def: $OPTS{database}) -wp_username - wordpress username (def: $OPTS{wp_username}) -wp_password - wordpress database password (see wp-config.php) + -blogid - id of the blog in MultiSite configuration (>1) -noratings - do not import ratings -help - this help -verbose - verbose output - specify multiple times for increased verbosity @@ -136,6 +138,7 @@ commands: -database - database name (default: 'wordpress') -wp_password - wordpress database password (must match password in wp-config.php) -wp_username - wordpress username (def: 'wordpress') + -blogid - id of the blog in MultiSite configuration (>1) -noratings - do not import ratings (in case postratings plugin not installed) -help - this helpful help -verbose - verbose output - specify multiple times for increased verbosity @@ -187,7 +190,7 @@ This utility was tested with SimplePHPBlog version 0.4.5 and 0.5.11 =head1 REVISION HISTORY - Revision 0.6 2008/12/09 + Revision 0.8 2014-04-16 0.1 Initial revision 0.2 fixed ratings @@ -196,15 +199,20 @@ This utility was tested with SimplePHPBlog version 0.4.5 and 0.5.11 0.5 Support pre-0.4.6.1 versions 0.6 fixed import bug on wp_term_relationships table 0.7 ratings needs to be written as floating point + 0.8sh update for WP 3.8.x, add support for MultiSite blogs, compressed data and absent links =head1 AUTHOR This implementation written by Will Hall I<dev@innerhippy.com> +Support 3.8.x, MultiSite and compression by Olivier Mehani +I<shtrom-wordpress@ssji.net> + =head1 COPYRIGHT and LICENSE Copyright 2008 Innerhippy Software Corporation. All rights reserved. +Copyright 2014 Olivier Mehani. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -236,6 +244,7 @@ policies, either expressed or implied, anywhere. =cut my $DBH; +my $blogid=""; usage() if !GetOptions(\%OPTS,@OPTIONS) or $OPTS{help}; @@ -252,6 +261,10 @@ if ($OPTS{create}) { createDatabase(); } +if ($OPTS{blogid}) { + $blogid="$OPTS{blogid}_"; +} + if ($OPTS{import}) { dbConnect($OPTS{database}); @@ -289,7 +302,7 @@ sub slug { sub updateCatCount { # Multiple rows returned so can't use runSelSQL my $sth = $DBH->prepare("SELECT term_taxonomy_id - FROM wp_term_relationships group by term_taxonomy_id") or die $DBH->errstr; + FROM wp_${blogid}term_relationships group by term_taxonomy_id") or die $DBH->errstr; $sth->execute(@_); my @cats; while ( my $row = $sth->fetchrow_arrayref) { @@ -297,9 +310,9 @@ sub updateCatCount { } $sth->finish; foreach my $taxonomy_id (@cats) { - runSQL ("UPDATE wp_term_taxonomy tt + runSQL ("UPDATE wp_${blogid}term_taxonomy tt SET tt.count = ( select count(*) - FROM wp_term_relationships tr + FROM wp_${blogid}term_relationships tr WHERE tr.term_taxonomy_id = tt.term_taxonomy_id ) WHERE tt.term_taxonomy_id = ?", $taxonomy_id); } @@ -308,35 +321,35 @@ sub updateCatCount { sub addEntry { my ($cats, $subject, $date, $content, $type) = @_; - if ( (runSelSQL ("SELECT * FROM wp_posts + if ( (runSelSQL ("SELECT * FROM wp_${blogid}posts WHERE post_title=? AND post_type=?", $subject, $type))[0]) { loggit ("Post exists: '$subject'", 1); return; } - runSQL ("INSERT INTO wp_posts ( + runSQL ("INSERT INTO wp_${blogid}posts ( post_author, post_date, post_date_gmt, post_content, post_title, - post_category, post_excerpt, post_password, post_name, to_ping, + post_excerpt, post_password, post_name, to_ping, pinged, post_modified, post_modified_gmt, post_content_filtered, guid, post_mime_type, post_type) - VALUES (1,?,?,?,?,?,'','',?,'','',?,?,'',0,'',?)", - $date, $date, $content, $subject, 0, + VALUES (1,?,?,?,?,'','',?,'','',?,?,'',0,'',?)", + $date, $date, $content, $subject, slug($subject), $date, $date, $type); - my $siteurl = (runSelSQL ("SELECT option_value FROM wp_options WHERE option_name='siteurl'"))[0] + my $siteurl = (runSelSQL ("SELECT option_value FROM wp_${blogid}options WHERE option_name='siteurl'"))[0] || error_exit "Cannot get siteurl from options table"; - my $insert_id = $DBH->last_insert_id(undef, undef, 'wp_posts', 'ID'); - runSQL ("UPDATE wp_posts SET guid=? WHERE post_title=?", "$siteurl/?p=$insert_id", $subject); + my $insert_id = $DBH->last_insert_id(undef, undef, 'wp_${blogid}posts', 'ID'); + runSQL ("UPDATE wp_${blogid}posts SET guid=? WHERE post_title=?", "$siteurl/?p=$insert_id", $subject); foreach my $category (@$cats) { my $category_id=(runSelSQL (qq{SELECT term_taxonomy_id - FROM wp_term_taxonomy,wp_terms - WHERE wp_term_taxonomy.term_id = wp_terms.term_id - AND wp_terms.name = ?}, $category))[0] + FROM wp_${blogid}term_taxonomy,wp_${blogid}terms + WHERE wp_${blogid}term_taxonomy.term_id = wp_${blogid}terms.term_id + AND wp_${blogid}terms.name = ?}, $category))[0] || die "lost category '$category'"; - runSQL ("INSERT INTO wp_term_relationships (object_id, term_taxonomy_id) + runSQL ("INSERT INTO wp_${blogid}term_relationships (object_id, term_taxonomy_id) VALUES (?,?)", $insert_id, $category_id); } return $insert_id; @@ -387,9 +400,12 @@ sub addPosts { my ($added, $skipped)=(0,0); foreach my $p (@$posts) { my @cat_names; - foreach (@{$p->{categories}}) { + foreach ($p->{categories}) { push @cat_names, $categories->{$_} || "Uncategorized"; # This should exist } + # Make the entries in @cat_names unique + my %categories = map { $_ => 1 } @cat_names; + @cat_names = keys %categories; my $id = addEntry (\@cat_names, $p->{subject},$p->{date},$p->{content},'post'); if ($id) { $added++; @@ -406,12 +422,12 @@ sub addPosts { sub addRating { my ($points, $votes, $id, $subject) = @_; $OPTS{noratings} && return; - my $max = (runSelSQL ("SELECT option_value FROM wp_options WHERE option_name='postratings_max'"))[0]; + my $max = (runSelSQL ("SELECT option_value FROM wp_${blogid}options WHERE option_name='postratings_max'"))[0]; $points = sprintf ("%.0f", ($points||0)*$max/5); my $avg = sprintf ("%.2f", ($points||0)/($votes||1)); # average rating score for max rating = 5 - runSQL ("INSERT INTO wp_postmeta (post_id, meta_key, meta_value) + runSQL ("INSERT INTO wp_${blogid}postmeta (post_id, meta_key, meta_value) VALUES (?,'ratings_users',?), (?, 'ratings_score', ?), (?, 'ratings_average', ?)", $id, $votes, $id, $points, $id, $avg); } @@ -421,7 +437,7 @@ sub addComments { my $i=0; foreach my $c (@$comments) { $i++; - runSQL ("INSERT INTO wp_comments (comment_post_ID, comment_author, + runSQL ("INSERT INTO wp_${blogid}comments (comment_post_ID, comment_author, comment_author_email, comment_author_url, comment_author_IP, comment_date, comment_date_gmt, comment_content, comment_agent, comment_type) @@ -429,7 +445,7 @@ sub addComments { $id, $c->{name}, $c->{email}, $c->{url}, $c->{date}, $c->{date}, $c->{content}); } # Update comment count - runSQL ("UPDATE wp_posts set comment_count=? where ID=?", $i, $id); + runSQL ("UPDATE wp_${blogid}posts set comment_count=? where ID=?", $i, $id); return $i; } sub addCategories { @@ -437,7 +453,7 @@ sub addCategories { my ($added, $skipped) = (0,0); # Make sure there's one called 'Uncategorized' - unless ( (runSelSQL ("SELECT term_id FROM wp_terms + unless ( (runSelSQL ("SELECT term_id FROM wp_${blogid}terms WHERE name='Uncategorized'"))[0]) { # find next category number free my $max; @@ -447,15 +463,15 @@ sub addCategories { foreach (keys %$cat) { - if ((runSelSQL("SELECT term_id,name from wp_terms WHERE name=?", $cat->{$_}))[0]) { + if ((runSelSQL("SELECT term_id,name from wp_${blogid}terms WHERE name=?", $cat->{$_}))[0]) { loggit ("category exists: $cat->{$_}", 1); $skipped++; next; } my $slug = slug ($cat->{$_}); - runSQL("INSERT INTO wp_terms (name,slug,term_group) VALUES (?, ?, 0)", $cat->{$_}, $slug); - my $insert_id = $DBH->last_insert_id(undef, undef, 'wp_terms', 'term_id'); - runSQL("INSERT INTO wp_term_taxonomy (term_id, taxonomy, description, parent, count) + runSQL("INSERT INTO wp_${blogid}terms (name,slug,term_group) VALUES (?, ?, 0)", $cat->{$_}, $slug); + my $insert_id = $DBH->last_insert_id(undef, undef, 'wp_${blogid}terms', 'term_id'); + runSQL("INSERT INTO wp_${blogid}term_taxonomy (term_id, taxonomy, description, parent, count) VALUES (?, 'category','',0,1)", $insert_id); $added++; } @@ -467,28 +483,28 @@ sub addLinks { my ($added, $skipped) = (0,0); my ($taxonomy_id, $count) = runSelSQL ( "SELECT tt.term_taxonomy_id, tt.count - FROM wp_term_taxonomy AS tt, wp_terms AS t + FROM wp_${blogid}term_taxonomy AS tt, wp_${blogid}terms AS t WHERE t.name='Blogroll' AND tt.term_id = t.term_id AND tt.taxonomy='link_category'"); - $taxonomy_id || die "no link_category"; + $taxonomy_id || return; foreach my $name (keys %$links) { - if ( (runSelSQL ("SELECT COUNT(*) FROM wp_links WHERE + if ( (runSelSQL ("SELECT COUNT(*) FROM wp_${blogid}links WHERE link_url=? AND link_name=?", $links->{$name}, $name))[0]) { loggit ("Link exists: $name", 1); $skipped++; next; } - runSQL ("INSERT INTO wp_links (link_url, link_name, link_image, link_target, + runSQL ("INSERT INTO wp_${blogid}links (link_url, link_name, link_image, link_target, link_description, link_rel, link_notes, link_rss) VALUES (?, ?, '', '', '', '', '', '')", $links->{$name}, $name); - my $insert_id = $DBH->last_insert_id(undef, undef, 'wp_links', 'link_id'); - runSQL ("INSERT INTO wp_term_relationships (object_id, term_taxonomy_id) VALUES (?,?)", $insert_id, $taxonomy_id); + my $insert_id = $DBH->last_insert_id(undef, undef, 'wp_${blogid}links', 'link_id'); + runSQL ("INSERT INTO wp_${blogid}term_relationships (object_id, term_taxonomy_id) VALUES (?,?)", $insert_id, $taxonomy_id); $count++; $added++; } - $added && runSQL ("UPDATE wp_term_taxonomy SET count=? WHERE term_taxonomy_id=?", $count, $taxonomy_id); + $added && runSQL ("UPDATE wp_${blogid}term_taxonomy SET count=? WHERE term_taxonomy_id=?", $count, $taxonomy_id); loggit ("Added $added, skipped $skipped links"); } @@ -567,7 +583,11 @@ sub new { sub parse_fields { my ($h, $file, $str) = @_; - open IN, $file or die "Cannot open $file:$!"; + if($file =~ /\.gz$/i) { + open IN, "gunzip -c $file |" or die "Cannot gunzip -c $file:$!"; + } else { + open IN, $file or die "Cannot open $file:$!"; + } local $/; $_=<IN>; close IN; @@ -631,9 +651,9 @@ sub import_dir { opendir (DIR, $dir) or die "Cannot open dir $dir: $!"; foreach (sort grep !/^\.+$/, readdir(DIR)) { -d "$dir/$_" && $self->import_dir ("$dir/$_"); - /^(entry[\d-]+)\.txt$/ && $self->add_post("$dir/$1"); + /^(entry[\d-]+)\.txt(.gz)?$/ && $self->add_post("$dir/$1", "$2"); /^categories\.txt$/ && $self->add_category("$dir/$_"); - /^static[\d-]+(\.txt)?$/ && $self->add_static("$dir/$_"); + /^static[\d-]+(\.txt(\.gz)?)?$/ && $self->add_static("$dir/$_"); /^blocks\.txt$/ && $self->add_blocks("$dir/$_"); /^links\.txt$/ && $self->add_links("$dir/$_"); } @@ -665,13 +685,13 @@ sub add_static { } sub add_post { - my ($self, $dir) = @_; + my ($self, $dir, $gz) = @_; # Get post - my $post = WordPressImport::Post->new("${dir}.txt"); + my $post = WordPressImport::Post->new("${dir}.txt${gz}"); # Get ratings and comments - $post->add_data($dir); + $post->add_data($dir,$gz); # Store post push @{$self->{posts}}, $post; @@ -736,7 +756,16 @@ sub new { $self->{categories} ||=0; # Account for no category in post (seen once) # A nasty kludge - but accommodate for posts with multiple categories # by redefining categories as an array. Yuk. - $self->{categories} = [ split ",", $self->{categories} ]; + # $self->{categories} = split(/,/, $self->{categories}); + my $data = $self->{categories}; + my @values = split(/,/, $data); + # Make sure we always have an array + # XXX: I don't think this works anymore... + if (ref(@values) eq 'ARRAY') { + $self->{categories} = @values; + } else { + $self->{categories} = [ $self->{categories} ]; + } WordPressImport::sanitize (\$self->{content}); return $self; } @@ -752,11 +781,11 @@ sub get_comments { } sub add_comments { - my ($self, $dir) = @_; + my ($self, $dir, $gz) = @_; -d $dir || return; local(*DIR); opendir (DIR, $dir) or die "Cannot open dir $dir: $!"; - foreach (grep /\.txt$/, readdir(DIR)) { + foreach (grep /\.txt$gz$/, readdir(DIR)) { push @{$self->{comments}}, WordPressImport::Comment->new ("$dir/$_"); } } @@ -769,13 +798,13 @@ sub add_rating { } sub add_data { - my ($self, $dir) = @_; + my ($self, $dir, $gz) = @_; -d $dir || return; local(*DIR); my $cm; opendir (DIR, $dir) or die "Cannot open dir $dir: $!"; foreach (grep !/^\.+$/, readdir(DIR)) { - /^comments$/ && $self->add_comments("$dir/$_"); + /^comments$/ && $self->add_comments("$dir/$_", $gz); /^rating.txt$/ && $self->add_rating("$dir/$_"); } } |