Simple JSON Parser in Perl
http://d.hatena.ne.jp/sifue/20120218/1329588477
JSONのパーサを書いてみたいと思って写経しようかと思ったんだけど、
個人的にそのままの書き写しはどうも頭に入らないので、Perl版を
書いてみました。
コード
JavaScript版に忠実です。
package JSON::Parser::Simple; use strict; use warnings; use 5.010; use Scalar::Util qw/looks_like_number/; use Carp (); use Encode (); use Encode::JavaScript::UCS; our $VERSION = '0.01'; sub new { my $class = shift; bless { at => 0, # The index of the current character ch => ' ', # The current character text => '', }, $class; } sub parse { my ($self, $source) = @_; $self->{text} = $source; $self->{at} = 0; $self->{ch} = ' '; return $self->_value(); } sub _value { my $self = shift; # Parse a JSON value. It could be an object, an array, atring, anumber # or word $self->_white(); given ($self->{ch}) { when ('{') { return $self->_object(); } when ('[') { return $self->_array(); } when ('"') { return $self->_string(); } when ('-') { return $self->_number(); } when (m/^[0-9]$/) { return $self->_number(); } default { return $self->_word(); } } } sub _object { my $self = shift; # Parse an object value my $object = {}; if ($self->{ch} eq '{') { $self->_next_char('{'); $self->_white(); if ($self->{ch} eq '}') { $self->_next_char('}'); return $object; } while ($self->{ch}) { my $key = $self->_string(); $self->_white(); $self->_next_char(':'); $object->{$key} = $self->_value(); $self->_white(); if ($self->{ch} eq '}') { $self->_next_char('}'); return $object; } $self->_next_char(','); $self->_white(); } } $self->_error("Bad object"); } sub _array { my $self = shift; # Parse an array value my $array = []; if ( $self->{ch} eq '[' ) { $self->_next_char('['); $self->_white(); if ($self->{ch} eq ']') { $self->_next_char(']'); return $array; } while ($self->{ch}) { push @{$array}, $self->_value(); $self->_white(); if ($self->{ch} eq ']') { $self->_next_char(']'); return $array; } $self->_next_char(','); $self->_white(); } } $self->_error("Bad Array"); } sub _string { my $self = shift; # Parse a string value my %escapee = ( '"' => '"', '\\' => '\\', '/' => '/', b => '\b', f => '\f', n => '\n', r => '\r', t => '\t', ); my $string = ''; # when parsing for string values, we must look for " and \ characters. if ($self->{ch} eq '"') { while ( $self->_next_char() ) { if ($self->{ch} eq '"') { $self->_next_char(); return $string; } elsif ($self->{ch} eq '\\') { $self->_next_char(); if ($self->{ch} eq 'u') { my $uffff = '\\u'; for (my $i = 0; $i < 4; $i++) { my $c = $self->_next_char(); last unless $c =~ m{^[0-9a-fA-F]$}; $uffff .= $c; } $string .= Encode::decode("JavaScript-UCS", $uffff); } elsif (exists $escapee{ $self->{ch} }) { $string .= $escapee{ $self->{ch} }; } else { last; } } else { $string .= $self->{ch}; } } $self->_error("Bad string"); } } sub _number { my $self = shift; # Parse a number value my $str = ''; if ($self->{ch} eq '-') { $str = '-'; $self->_next_char('-'); } while ($self->{ch} =~ m{^[0-9]$}) { $str .= $self->{ch}; $self->_next_char(); } if ($self->{ch} eq '.') { $str .= '.'; while ( $self->_next_char() ) { last unless $self->{ch} =~ m{^[0-9]$}; $str .= $self->{ch}; } } if ($self->{ch} =~ m{^[eE]$}) { $str .= $self->{ch}; $self->_next_char(); if ($self->{ch} eq '-' || $self->{ch} eq '+') { $str .= $self->{ch}; $self->_next_char(); } while ($self->{ch} =~ m{^[0-9]$}) { $str .= $self->{ch}; $self->_next_char(); } } unless ( looks_like_number($str) ) { $self->_error("Bad number"); } return +$str; } sub _word { my $self = shift; # true, false, or null given ($self->{ch}) { when ("t") { $self->_next_char('t'); $self->_next_char('r'); $self->_next_char('u'); $self->_next_char('e'); return 1; } when ("f") { $self->_next_char('f'); $self->_next_char('a'); $self->_next_char('l'); $self->_next_char('s'); $self->_next_char('e'); return 0; } when ("n") { $self->_next_char('n'); $self->_next_char('u'); $self->_next_char('l'); $self->_next_char('l'); return undef; } } $self->_error("Unexpected '$self->{ch}'"); } sub _next_char { my ($self, $c) = @_; # if a $c parameter is provided, verify that it matches the current character if ($c && $c ne $self->{ch}) { $self->_error("Expected: '$c', instead of '$self->{ch}' "); } # Get the next character. When there are no more characters # return the empty string $self->{ch} = substr $self->{text}, $self->{at}, 1; $self->{at}++; return $self->{ch}; } sub _white { my $self = shift; while ($self->{ch} && $self->{ch} =~ m{^\s$}) { $self->_next_char(); } } sub _error { my ($self, $message) = @_; my $str = sprintf "%s (at:%d, text:%s)", $message, $self->{at}, $self->{text}; die $str; }
実行
テストコード
#!perl use strict; use warnings; use JSON::Parser::Simple; use Data::Dumper; my $p = JSON::Parser::Simple->new; my $result = $p->parse(' { "a" : 10, "b" : [1,true,null], "c" : { "d" : "\u0068\u0065\u006c\u006c\u006f"} } '); local $Data::Dumper::Terse = 1; die Dumper($result);
結果
% perl -Ilib eg/sample.pl { 'c' => { 'd' => 'hello' }, 'a' => '10', 'b' => [ '1', 1, undef ] }
おわりに
簡単なものでもパーサを書くというのはなかなか勉強に
なるものだなと感じました。