
package Blake3HasherPurePerl;

use strict;
use bytes;
use constant {
    BLOCK_LEN => 64,

    # flag values
    CHUNK_START          => 1 << 0,
    CHUNK_END            => 1 << 1,
    PARENT               => 1 << 2,
    ROOT                 => 1 << 3,
	# NOT SUPPORTING THESE MODES. NORMAL HASHING ONLY.
    # KEYED_HASH           => 1 << 4,
    # DERIVE_KEY_CONTEXT   => 1 << 5,
    # DERIVE_KEY_MATERIAL  => 1 << 6,
};

my @IV = ( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
           0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 );

# my @MSG_PERMUTATION = (2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8);
sub new {

    my $state = {
	       chaining_value => [ @IV ],  # h, 8 four-byte integers
		   message_block  => '',       # m, and also b, from the length. Max 64 bytes
		   total_bytes    => 0,        # this, mod 64, or 64, should be length of message_block
		   CV_stack       => [],       # up to 54 more arrays of 8 four-byte integers
	};
		
	bless $state;
}

sub freeze  { my $state = shift;
   my $serialization = pack "V8", $state->{chaining_value}->@*;
   $serialization .= pack 'C', length $state->{message_block};
   $serialization .= $state->{message_block};
   $serialization .= pack "V2", $state->{total_bytes} & 0xFFFF_FFFF , ( $state->{total_bytes} >> 32 ) & 0xFFFF_FFFF;
   $serialization .= pack "V8", @$_ for $state->{CV_stack}->@*;
   $serialization;
}

sub thaw { @_ > 2 and die "only one serialization allowed in thaw method";
    my $iceblock = pop; 
	my %state;
	$state{chaining_value} = [ unpack 'V8', substr($iceblock,0,32,'') ];
	my $message_len = unpack 'C', substr $iceblock,0,1,'';
	$state{message_block} = substr($iceblock,0,$message_len,'');
	my ($low_word, $high_word) = unpack 'V2', substr($iceblock,0,8,'');
	$state{total_bytes} = ($high_word << 32) + $low_word;
	$state{CV_stack} = [];
	push $state{CV_stack}->@*, [unpack 'V8', substr($iceblock,0,32,'') ] while length $iceblock;
	bless \%state;
}

sub clone { $_[0]->thaw($_[0]->freeze()) }

sub add {
  defined(wantarray) and die "Usage: call hasher->add in void context\n";
  my $state = shift;
  while (@_){
	  my $piece = shift;
	  while (length $piece) {
		  # "Processed one byte at a time" -- Johnny Crash
		  my $octet = substr($piece,0,1,'');
		  if ( length($state->{message_block}) == 64 ){
			  my $flags = 0x0000_0000;
			  my $length_into_chunk = $state->{total_bytes} & 1023;
			  if ($length_into_chunk == 0 ){
				  $flags |= CHUNK_END
			  }elsif ( $length_into_chunk == 64 ){
				  $flags |= CHUNK_START
			  };
		      $state->compress( $flags ); # consumes and clears message_block
		  };
		  $state->{message_block} .= $octet;
		  $state->{total_bytes}++;
	  };
  };
}

sub _mask32 { $_[0] & 0xFFFF_FFFF }

sub _add32 { ($_[0] + $_[1]) & 0xFFFF_FFFF }

sub _rotr32 {
    my ($x, $n) = @_;
    $x &= 0xFFFF_FFFF;
    (($x >> $n) | (($x << (32 - $n)) & 0xFFFF_FFFF)) & 0xFFFF_FFFF;
}

sub _g {
    my ($v, $a, $b, $c, $d, $mx, $my) = @_;

    $v->[$a] = _add32($v->[$a], _add32($v->[$b], $mx));
    $v->[$d] = _rotr32($v->[$d] ^ $v->[$a], 16);
    $v->[$c] = _add32($v->[$c], $v->[$d]);
    $v->[$b] = _rotr32($v->[$b] ^ $v->[$c], 12);

    $v->[$a] = _add32($v->[$a], _add32($v->[$b], $my));
    $v->[$d] = _rotr32($v->[$d] ^ $v->[$a], 8);
    $v->[$c] = _add32($v->[$c], $v->[$d]);
    $v->[$b] = _rotr32($v->[$b] ^ $v->[$c], 7);
}

sub _round {
    my ($v, $m) = @_;

    # columns
    _g($v, 0, 4,  8, 12, $m->[0],  $m->[1]);
    _g($v, 1, 5,  9, 13, $m->[2],  $m->[3]);
    _g($v, 2, 6, 10, 14, $m->[4],  $m->[5]);
    _g($v, 3, 7, 11, 15, $m->[6],  $m->[7]);

    # diagonals
    _g($v, 0, 5, 10, 15, $m->[8],  $m->[9]);
    _g($v, 1, 6, 11, 12, $m->[10], $m->[11]);
    _g($v, 2, 7,  8, 13, $m->[12], $m->[13]);
    _g($v, 3, 4,  9, 14, $m->[14], $m->[15]);
}

# note: elements of @_ are L-value arguments, @_ itself is not
sub _permute { @_[0 .. 15] = @_[2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8] }

sub compress {
    my ($state, $flags) = @_;
	
    my $block_len = length $state->{message_block};
    $block_len == 64 or $state->{message_block} .= "\0" x ( 64 - $block_len);

    my @m = unpack 'V16', $state->{message_block};

    my $chunk_counter = ($state->{total_bytes} - $block_len)  >> 10;

    my @v = (
        $state->{chaining_value}->@*,
        @IV[0..3],
        $chunk_counter & 0xFFFF_FFFF,
        ($chunk_counter >> 32) & 0xFFFF_FFFF,
        $block_len,
        $flags,
    );

    # note: _permute operates directly on @_
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m);

    for my $i (0..7) {
        $v[$i]     = _mask32($v[$i] ^ $v[$i + 8]);
        $v[$i + 8] = _mask32($v[$i + 8] ^ $state->{chaining_value}[$i]);
    }

    $state->{chaining_value} = [ @v[0..7] ];
    $state->{message_block}  = '';

    # If that was the last block in a chunk, merge the completed chunk CV.
    if ( $flags & CHUNK_END ) {
        my $finished_cv = $state->{chaining_value};
		
	    if ($state->{total_bytes}) {
           $state->merge_cv($finished_cv);
		}else{
			push $state->{CV_stack}->@*, $finished_cv;
		};

        # start next chunk from IV
        $state->{chaining_value} = [ @IV ];
    }
}

our $parent_flags = PARENT;
sub _parent_cv {
    my ($left_cv, $right_cv) = @_;

    my @m = ($left_cv->@*, $right_cv->@*);
    my @v = (
        @IV,
        @IV[0..3],
        0, 0,
        BLOCK_LEN, # 64
        $parent_flags, # includes ROOT at the very end of digesting
    );

    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m); _permute(@m);
    _round(\@v, \@m);

    for my $i (0..7) {
        $v[$i]     = _mask32($v[$i] ^ $v[$i + 8]);
        $v[$i + 8] = _mask32($v[$i + 8] ^ $IV[$i]);
    }

    [ @v[0..7] ];
}

our $digesting = 0;
sub merge_cv { my ($state, $new_cv) = @_;
    my $total_chunks = $state->{total_bytes} >> 10;
	# include the final short chunk when digesting
	$digesting and $state->{total_bytes} & 0x03FF and $total_chunks++;

    while ( ($total_chunks & 1) == 0 ) {
        my $left_cv = pop $state->{CV_stack}->@*
            or die "CV stack underflow";
		$digesting and !@{$state->{CV_stack}} and $parent_flags |= ROOT;
        $new_cv = _parent_cv($left_cv, $new_cv);
        $total_chunks >>= 1;
    }

    push $state->{CV_stack}->@*, $new_cv;
}


sub digest { my $state = shift;
    local $parent_flags = $parent_flags;
	local $digesting = 1;
    
	
	# because $state->{message_block} .= $octet is the
	# last thing that happens in add(), there will always
	# be at least one byte in the message_block except
	# immediately after hasher creation.

	my $flags = CHUNK_END; # always CHUNK_END, this is the final chunk, and may be short.
    $state->{total_bytes} <= 1024 and $flags |= ROOT;  # zero or one chunk
	
	my $length_into_chunk = $state->{total_bytes} & 1023;
	if ($length_into_chunk){
		$length_into_chunk <= 64 and $flags |= CHUNK_START ;
	}else{
	    $state->{total_bytes} == 0 	and $flags |= CHUNK_START ;
	};
	$state->compress( $flags ); # consumes and clears message_block
	my $zip_up = pop $state->{CV_stack}->@*;
	if ( @{$state->{CV_stack}} ){
		my $leftmost = shift $state->{CV_stack}->@*;
	    for my $left ( reverse $state->{CV_stack}->@* ){
            $zip_up = _parent_cv($left, $zip_up);
        };
        $parent_flags |= ROOT;
        $zip_up = _parent_cv($leftmost, $zip_up);
	}
    
	%$state = %{ $state->new }; # reset the object
	
    pack 'V8', @$zip_up; # here's the digest
}
sub hexdigest { unpack 'H*', $_[0]->digest()} 
use Exporter 'import';
our @EXPORT = qw/b3pp/;
sub b3pp { __PACKAGE__->new() };

1;
__END__

=head1 NAME
Blake3HasherPurePerl - the BLAKE3 hash function in pure Perl

=head1 SYNOPSIS
 use lib "."; # or wherever you put this file
 use Blake3HasherPurePerl;
 my $hasher = b3pp();
 $hasher->add("hello");
 $hello_hash = $hasher->clone->digest(); # use clone to not reset
 $hasher->add(" world\n");
 $hash = $hasher->hexdigest(); # matches `echo hello world | ./b3sum --no-names`
 # which is dc5a4edb8240b018124052c330270696f96771a63b45250a5c17d3000e823355

=head1 DESCRIPTION
A pure Perl implementation of the BLAKE3 hash function.

The module provides a subset of the interface found in L<Digest::BLAKE3>,
which you should obviously prefer for serious work.

=head1 DEPENDENCIES
A Perl recent enough to have postfix dereference syntax, since the code uses that.

=head1 METHODS

=over

=item $class->new()

=item $hasher->new()

=item b3pp()

the constructor. C<b3pp> is exported by default.

=item $hasher->clone()

Returns a new hasher with the same state, mode, and output size
as the original.

=item $hasher->add($bytes, ...)

Updates the hasher state with each of the given byte strings.

=item $hasher->digest()

Returns the final hash value as a byte string and resets the hasher.

=item $hasher->hexdigest()

Returns the final hash value as a hexadecimal text string and resets
the hasher.

=item $hasher->freeze()

serializes a b3pp into a binary string, in case you would like to, for instance,
associate one of these with a file you are planning to append more data to,
and get the current hash based on the frozen state and the string getting appended,
without having to read the file from the beginning. Also used in the C<clone> method.

=item $class->thaw(serialization)

create a hasher from a frozen hasher

=item $hasher->clone

returns $hasher->thaw($hasher->freeze())

=back

=head1 AUTHOR

David Nicol <davidnicol@gmail.com>

=cut
