unit DCT;
//https://groups.google.com/forum/?hl=pl&fromgroups#!search/FDCT$20code/comp.graphics.algorithms/tid5yNpkNE0/TlU_CYKNvx4J

//---------------------------------------------------------------------
//Unit Feig;
{ The Feig direct 2D scaled Discrete Cosine Transform.

  Feig 2D method extends Arai, Agui and Nakajima fast scaled DCT to
  2D (464 adds and 80 mult.) with further computational saving
  (462 adds, 54 mults and 6 shits).

  The forward DCT is described with flow diagrams from the Pennebaker/
  Mitchell JPEG book. The inverse DCT flow diagrams are obtained
  from the inverse matrices. Scaling must be done accordingly.

  Jacques NOMSSI NZALI, May 16th 1995 }


interface

type
   vek8 = array[0..7] of real;
   matrix  = array[0..7] of vek8;

type
   ivek8 = array[0..7] of smallInt;
   imatrix  = array[0..7] of ivek8;

procedure Feig_2D_FDCT(var data :imatrix);
  { forward discrete cosine transform }

procedure Feig_2D_IDCT(var sample:imatrix);
  { inverse discrete cosine transform }

implementation

const
  IFX_CONST = 1 shl 8;
  PASS_BITS = 3;

procedure Feig_2D_FDCT(var data :imatrix);
Const
  CONST_C4 = 0.707106781;
  FP_C4    = smallInt(Round(IFX_CONST*CONST_C4));
  FP_C4_2  = smallInt(Round(IFX_CONST*CONST_C4/2)); { c4/2 }


  function Descale(x : smallInt):smallInt;
  begin
    DeScale := x div (8 shl PASS_BITS);

    { DeScale := x shr (3 + PASS_BITS); Borland Pascal SHR is unsigned }
  end;

  function Multiply(X, Y: smallInt): smallInt;
  begin
    Multiply := smallInt( X*LongInt(Y) div IFX_CONST);
  end;

  procedure N1(var x, y : smallInt);   { rotator 1 }
  Const
    FP_a2 = smallInt(Round(IFX_CONST*0.541196100));
    FP_a4 = smallInt(Round(IFX_CONST*1.306562965));
    FP_a5 = smallInt(Round(IFX_CONST*0.382683433));
  var
    z5 : smallInt;
  begin
    x := - x; { rotator is not yet modified to avoid extra negation }

    z5 := Multiply(x - y, FP_a5);  { c6 }
    x := Multiply(x, FP_a2) + z5;  { c2-c6 }
    y := Multiply(y, FP_a4) + z5;  { c2+c6 }
  end;

  Procedure N2(var x, y : smallInt); { N1 scaled by c4 }
  Const
    FP_b2 = smallInt(Round(IFX_CONST*0.541196100*CONST_C4));
    FP_b4 = smallInt(Round(IFX_CONST*1.306562965*CONST_C4));
    FP_b5 = smallInt(Round(IFX_CONST*0.382683433*CONST_C4));
  var
    z5 : smallInt;
  begin
    x := - x;  { rotator is not yet modified to avoid extra negation }

    z5 := Multiply(x - y, FP_b5);
    x := Multiply(x, FP_b2) + z5;
    y := Multiply(y, FP_b4) + z5;
  end;

var
  z10, z11, z12, z13,
  tmp0,tmp1,tmp2,tmp3,
  tmp4,tmp5,tmp6,tmp7,
  tmp10,tmp11,
  tmp12,tmp13 : smallInt;
  column, row : byte;
var
  wrkspace : imatrix;

begin
  { R2 x R2 tensor }
  for column := 7 downto 0 do
  BEGIN
    { even part }
    tmp0 := data[0,column] shl PASS_BITS;  { first permutation }
    tmp1 := data[1,column] shl PASS_BITS;  { column <-> rows }
    tmp2 := data[2,column] shl PASS_BITS;
    tmp3 := data[3,column] shl PASS_BITS;
    { Odd part }
    tmp4 := data[4,column] shl PASS_BITS;
    tmp5 := data[5,column] shl PASS_BITS;
    tmp6 := data[6,column] shl PASS_BITS;
    tmp7 := data[7,column] shl PASS_BITS;
    { even part }
    tmp10 := tmp0 + tmp7;
    tmp11 := tmp1 + tmp6;
    tmp12 := tmp2 + tmp5;
    tmp13 := tmp3 + tmp4;

    z10 := tmp10 + tmp13;
    z13 := tmp10 - tmp13;
    z11 := tmp11 + tmp12;
    z12 := tmp11 - tmp12;

    wrkspace[0,column] := z10 + z11;     { second permutation }
    wrkspace[1,column] := z10 - z11;     { rows to columns }

    wrkspace[2,column] := z12 + z13;
    wrkspace[3,column] := z13;

    { Odd part }
    tmp13 := tmp0 - tmp7;
    tmp12 := tmp1 - tmp6;
    tmp11 := tmp2 - tmp5;
    tmp10 := tmp3 - tmp4;

    wrkspace[4,column] := -(tmp10 + tmp11);
    wrkspace[5,column] := tmp11 + tmp12;

    wrkspace[6,column] := tmp12 + tmp13;
    wrkspace[7,column] := tmp13;
  END;

  for row := 0 to 7 do
  BEGIN { Odd part }
    { even part }
    tmp0 := wrkspace[row,0];
    tmp1 := wrkspace[row,1];
    tmp2 := wrkspace[row,2];
    tmp3 := wrkspace[row,3];
    { Odd part }
    tmp4 := wrkspace[row,4];
    tmp5 := wrkspace[row,5];
    tmp6 := wrkspace[row,6];
    tmp7 := wrkspace[row,7];

    { even part }
    tmp10 := tmp0 + tmp7;
    tmp11 := tmp1 + tmp6;
    tmp12 := tmp2 + tmp5;
    tmp13 := tmp3 + tmp4;

    z10 := tmp10 + tmp13;
    z13 := tmp10 - tmp13;
    z11 := tmp11 + tmp12;
    z12 := tmp11 - tmp12;

    wrkspace[row,0] := z10 + z11;
    wrkspace[row,1] := z10 - z11;

    wrkspace[row,2] := z12 + z13;
    wrkspace[row,3] := z13;

    { Odd part }
    tmp13 := tmp0 - tmp7;
    tmp12 := tmp1 - tmp6;
    tmp11 := tmp2 - tmp5;
    tmp10 := tmp3 - tmp4;

    wrkspace[row,4] :=-(tmp10 + tmp11);
    wrkspace[row,5] := tmp11 + tmp12;

    wrkspace[row,6] := tmp12 + tmp13;
    wrkspace[row,7] := tmp13;
  END;

  { M x M tensor }
  for row := 0 to 7 do
  Case row of
  0,1,3,7: { M1 }
    begin
      wrkspace[row,2] := Multiply(wrkspace[row,2], FP_C4);
      wrkspace[row,5] := Multiply(wrkspace[row,5], FP_C4);

      N1(wrkspace[row,4], wrkspace[row,6]);
    end;
  2,5: { M2 }
    begin
      wrkspace[row,0] := Multiply(wrkspace[row,0], FP_C4);
      wrkspace[row,1] := Multiply(wrkspace[row,1], FP_C4);
      wrkspace[row,3] := Multiply(wrkspace[row,3], FP_C4);
      wrkspace[row,7] := Multiply(wrkspace[row,7], FP_C4);

      wrkspace[row,2] := wrkspace[row,2] div 2;
      wrkspace[row,5] := wrkspace[row,5] div 2;

      N2(wrkspace[row,4], wrkspace[row,6]);
    end;
  end; { Case }

  { M x N tensor, rows 4,6 }
  begin
    N1(wrkspace[4,0], wrkspace[6,0]);
    N1(wrkspace[4,1], wrkspace[6,1]);
    N1(wrkspace[4,3], wrkspace[6,3]);
    N1(wrkspace[4,7], wrkspace[6,7]);

    N2(wrkspace[4,2], wrkspace[6,2]);
    N2(wrkspace[4,5], wrkspace[6,5]);

    { N3 }
    tmp0 := wrkspace[4,4];
    tmp1 := wrkspace[6,4];
    tmp2 := wrkspace[4,6];
    tmp3 := wrkspace[6,6];

    z10 := tmp0 - tmp3;
    z11 := tmp1 + tmp2;

    z12 := tmp0 + tmp3;
    z13 := tmp1 - tmp2;

    tmp0 := Multiply(z10 + z11, FP_C4_2);
    tmp1 := Multiply(z10 - z11, FP_C4_2);

    tmp2 := z12 div 2;       { shifts }
    tmp3 := z13 div (-2);

    wrkspace[4,4] := tmp2 + tmp0;
    wrkspace[6,4] := tmp1 + tmp3;

    wrkspace[4,6] := tmp1 - tmp3;
    wrkspace[6,6] := tmp2 - tmp0;
   end;

  { R1 x R1 }
  for row := 0 to 7 do
  begin
    { even part }
    tmp0 := wrkspace[row,0];
    tmp1 := wrkspace[row,1];
    tmp2 := wrkspace[row,2];
    tmp3 := wrkspace[row,3];
    { Odd part }
    tmp4 := wrkspace[row,4];
    tmp5 := wrkspace[row,5];
    tmp6 := wrkspace[row,6];
    tmp7 := wrkspace[row,7];

    { even part }
    wrkspace[row,0] := tmp0;
    wrkspace[row,4] := tmp1;

    wrkspace[row,2] := tmp3 + tmp2;
    wrkspace[row,6] := tmp3 - tmp2;

    { Odd part }
    z11 := tmp7 + tmp5;
    z13 := tmp7 - tmp5;

    wrkspace[row,5] := z13 + tmp4;
    wrkspace[row,1] := z11 + tmp6;

    wrkspace[row,7] := z11 - tmp6;
    wrkspace[row,3] := z13 - tmp4;
  End;

  for column := 7 downto 0 do
  Begin
    { even part }
    tmp0 := wrkspace[0,column];           { permutation }
    tmp1 := wrkspace[1,column];
    tmp2 := wrkspace[2,column];
    tmp3 := wrkspace[3,column];

    data[0,column] := DeScale(tmp0);
    data[4,column] := DeScale(tmp1);

    data[2,column] := DeScale(tmp3 + tmp2);
    data[6,column] := DeScale(tmp3 - tmp2);

    { Odd part }
    tmp4 := wrkspace[4,column];
    tmp5 := wrkspace[5,column];
    tmp6 := wrkspace[6,column];
    tmp7 := wrkspace[7,column];

    z11 := tmp7 + tmp5;
    z13 := tmp7 - tmp5;

    data[5,column] := DeScale(z13 + tmp4);
    data[1,column] := DeScale(z11 + tmp6);

    data[7,column] := DeScale(z11 - tmp6);
    data[3,column] := DeScale(z13 - tmp4);
  End;

End; { Feig FDCT ---------------------------------------- }

procedure Feig_2D_IDCT(var sample:imatrix);
Const
  CONST_IC4 = 1.414213562; { 1/0.707106781; }
  FP_IC4    = smallInt(Round(IFX_CONST*CONST_IC4));
  FP_I_C4_2  = FP_IC4;

  Function Descale(x : smallInt):smallInt;
  begin
    DeScale := (x+ (4 shl PASS_BITS)) div (8 shl PASS_BITS);
    { DeScale := x sar (3 + PASS_BITS);
      Borland Pascal SHR is unsigned }
  end;
  {
  function Multiply(X, Y: smallInt): smallInt;
  begin
    Multiply := smallInt( X*LongInt(Y) div IFX_CONST);
  end;
  }
  function Multiply(X, Y: smallInt): smallInt; assembler;
  asm
    mov ax, X
    imul Y
    mov al, ah
    mov ah, dl
  end;


var
  z10, z11, z12, z13,
  tmp0,tmp1,tmp2,tmp3,
  tmp4,tmp5,tmp6,tmp7,
  tmp10,tmp11,
  tmp12,tmp13 : smallInt;
  column, row : byte;

  Procedure N1(var x, y : smallInt);   { rotator 1 }
  Const
    FP_a5 = smallInt(Round(IFX_CONST*1.847759065));
    FP_a4 = smallInt(Round(IFX_CONST*2.613125930));
    FP_a2 = smallInt(Round(IFX_CONST*1.082392200));
  var
    z5, tmp : smallInt;
  begin
    tmp := x;

    z5 := Multiply(tmp + y, FP_a5);  { c6 }
    x := Multiply(y, FP_a2) - z5;  { c2-c6 }
    y := Multiply(tmp, -FP_a4) + z5;  { c2+c6 }
  end;

  Procedure N2(var x, y : smallInt); { N1 scaled by c4 }
  Const
    FP_b5 = smallInt(Round(IFX_CONST*1.847759065*CONST_IC4));
    FP_b4 = smallInt(Round(IFX_CONST*2.613125930*CONST_IC4));
    FP_b2 = smallInt(Round(IFX_CONST*1.082392200*CONST_IC4));
  var
    z5, tmp : smallInt;
  begin
    tmp := x;

    z5 := Multiply(tmp + y, FP_b5);
    x := Multiply(y, FP_b2) - z5;
    y := Multiply(tmp,-FP_b4) + z5;
  end;

begin
  { R1 x R1 }
  for column := 7 downto 0 do
  BEGIN
    { Odd part }
    tmp4 := sample[5,column] shl PASS_BITS;
    tmp5 := sample[1,column] shl PASS_BITS;
    tmp6 := sample[7,column] shl PASS_BITS;
    tmp7 := sample[3,column] shl PASS_BITS;

    { even part }
    tmp0 := sample[0,column] shl PASS_BITS;
    tmp1 := sample[4,column] shl PASS_BITS;
    tmp2 := sample[2,column] shl PASS_BITS;
    tmp3 := sample[6,column] shl PASS_BITS;

    z10 := tmp4 - tmp7;
    z11 := tmp5 + tmp6;
    z12 := tmp5 - tmp6;
    z13 := tmp4 + tmp7;

    sample[4,column] := z10;             { phase 5 }
    sample[5,column] := z11 - z13;

    sample[6,column] := z12;
    sample[7,column] := z11 + z13;


    sample[0,column] := tmp0;
    sample[1,column] := tmp1;

    sample[2,column] := tmp2 - tmp3;
    sample[3,column] := tmp2 + tmp3;
  END;

  for row := 7 downto 0 do
  BEGIN
    { even part }
    tmp0 := sample[row,0];
    tmp1 := sample[row,4];
    tmp2 := sample[row,2];
    tmp3 := sample[row,6];
    { Odd part }
    tmp4 := sample[row,5];
    tmp5 := sample[row,1];
    tmp6 := sample[row,7];
    tmp7 := sample[row,3];

    sample[row,0] := tmp0;
    sample[row,1] := tmp1;
    sample[row,2] := tmp2 - tmp3;
    sample[row,3] := tmp2 + tmp3;

    { Odd part }
    z10 := tmp4 - tmp7;
    z11 := tmp5 + tmp6;

    z12 := tmp5 - tmp6;
    z13 := tmp4 + tmp7;

    sample[row,4] := z10;
    sample[row,5] := z11 - z13;
    sample[row,6] := z12;
    sample[row,7] := z11 + z13;
  END;

  { M x M tensor }
  for row := 0 to 7 do
  Case row of
  0,1,3,7: { M1 }
    begin
      sample[row,2] := Multiply(sample[row,2], FP_IC4);     { 2/c4 }
      sample[row,5] := Multiply(sample[row,5], FP_IC4);     { 2/c4 }

      N1(sample[row, 4], sample[row, 6]);
    end;
  2,5: { M2 }
    begin
      sample[row,0] := Multiply(sample[row,0], FP_IC4);
      sample[row,1] := Multiply(sample[row,1], FP_IC4);
      sample[row,3] := Multiply(sample[row,3], FP_IC4);
      sample[row,7] := Multiply(sample[row,7], FP_IC4);

      sample[row,2] := sample[row,2] * 2;  { shift }
      sample[row,5] := sample[row,5] * 2;

      N2(sample[row,4], sample[row,6]);
    end;
  end; { Case }

  { M x N tensor }
  { rows 4,6 }
  begin
    N1(sample[4,0], sample[6,0]);
    N1(sample[4,1], sample[6,1]);
    N1(sample[4,3], sample[6,3]);
    N1(sample[4,7], sample[6,7]);

    N2(sample[4,2], sample[6,2]);
    N2(sample[4,5], sample[6,5]);

    { N3 }
    tmp0 := sample[4,4];
    tmp1 := sample[6,4];
    tmp2 := sample[4,6];
    tmp3 := sample[6,6];

    { two inverse matrices => same as FDCT }
    z10 := tmp0 - tmp3;
    z11 := tmp1 + tmp2;

    z12 := tmp0 + tmp3;
    z13 := tmp1 - tmp2;

    tmp0 := Multiply(z10 + z11, FP_I_C4_2);
    tmp1 := Multiply(z10 - z11, FP_I_C4_2);

    tmp2 := z12 * 2;       { shifts }
    tmp3 := z13 * (-2);


    sample[4,4] := tmp2 + tmp0;
    sample[6,4] := tmp1 + tmp3;

    sample[4,6] := tmp1 - tmp3;
    sample[6,6] := tmp2 - tmp0;
  end;

  { R2 x R2 }

  for row := 0 to 7 do
  BEGIN
    { even part }
    tmp0 := sample[row,0];
    tmp1 := sample[row,1];
    tmp2 := sample[row,2];
    tmp3 := sample[row,3];

    tmp10 := tmp0 + tmp1;
    tmp11 := tmp0 - tmp1;

    tmp12 := tmp2 - tmp3;
    tmp13 := tmp3;

    tmp0 := tmp10 + tmp13;
    tmp3 := tmp10 - tmp13;

    tmp1 := tmp11 + tmp12;
    tmp2 := tmp11 - tmp12;

    { Odd part }
    tmp4 := sample[row,4];
    tmp5 := sample[row,5];
    tmp6 := sample[row,6];
    tmp7 := sample[row,7];

    tmp6 := tmp6 - tmp7;
    tmp5 := tmp5 - tmp6;
    tmp4 :=-tmp4 - tmp5;

    sample[row,0] := (tmp0 + tmp7);
    sample[row,1] := (tmp1 + tmp6);

    sample[row,2] := (tmp2 + tmp5);
    sample[row,3] := (tmp3 + tmp4);

    sample[row,4] := (tmp3 - tmp4);
    sample[row,5] := (tmp2 - tmp5);

    sample[row,6] := (tmp1 - tmp6);
    sample[row,7] := (tmp0 - tmp7);
  END;

  for column := 0 to 7 do
  BEGIN
    { even part }
    tmp0 := sample[0,column];
    tmp1 := sample[1,column];
    tmp2 := sample[2,column];
    tmp3 := sample[3,column];

    tmp10 := tmp0 + tmp1;
    tmp11 := tmp0 - tmp1;

    tmp13 := tmp3;
    tmp12 := tmp2 - tmp3;

    tmp0 := tmp10 + tmp13;
    tmp3 := tmp10 - tmp13;

    tmp1 := tmp11 + tmp12;
    tmp2 := tmp11 - tmp12;

    { Odd part }
    tmp4 := sample[4,column];
    tmp5 := sample[5,column];
    tmp6 := sample[6,column];
    tmp7 := sample[7,column];

    tmp6 := tmp6 - tmp7;
    tmp5 := tmp5 - tmp6;
    tmp4 :=-tmp4 - tmp5;

    sample[0,column] := DeScale(tmp0 + tmp7);
    sample[1,column] := DeScale(tmp1 + tmp6);

    sample[2,column] := DeScale(tmp2 + tmp5);
    sample[3,column] := DeScale(tmp3 + tmp4);

    sample[4,column] := DeScale(tmp3 - tmp4);
    sample[5,column] := DeScale(tmp2 - tmp5);

    sample[6,column] := DeScale(tmp1 - tmp6);
    sample[7,column] := DeScale(tmp0 - tmp7);
  END;
End; {----------------------------------------}


End.


