[Back to TEXTFILE SWAG index]  [Back to Main SWAG index]  [Original]

Program Dup;
   { delete duplicate lines from a sorted text file }
   { Dup file1 file2 }

Uses
  Dos;

Type
  string3 = string[3];
Const
  WhiteSpace : string3 = #00#09#255;

Const
  NoFAttr : word = $1C; { attributen dir, volume, system }
  FAttr   : word = $23; { readonly-, hidden-, archive attributen }
  BufSize = 16384;      { buffersize 16 KB }

Type
  BufType = array [1..BufSize] of char;

Var
  Fname1, Fname2   : string;
  Line1, Line2     : string;
  tmp1 , tmp2      : string;   { temporary vars for lower case comparing }
  OldFile, NewFile : text;
  OldBuf , NewBuf  : BufType;
  tel              : longint;


function OpenTextFile( var InF: text; name: string; var buffer: BufType ): boolean;
begin
  Assign( InF, Name );
  SetTextBuf( InF, buffer );
  Reset( InF );
  OpenTextFile := ( IOResult = 0 );
end { OpenTextFile };

function CreateTextFile( var OutF: text; name: string; var buffer: BufType ): boolean;
begin
  Assign( OutF, Name );
  SetTextBuf( OutF, buffer );
  Rewrite( OutF );
  CreateTextFile := ( IOResult = 0 );
end { CreateTextFile };


function FileExist( var FName : string ) : Boolean;
  {-Return true if entry is found and if it's a file}
var
  F    : file;
  Attr : Word;
begin
  Assign( F, FName );
  GetFAttr( F, Attr );
  if DosError = 0 then
    FileExist := ( ( Attr and NoFAttr ) = 0 )
    { not dir-, volume- or system bit? }
  else
    FileExist := False;            { DosError }
  {}
end;


procedure StrCopy( var Str1, Str2: string ); assembler;
  { copy str1 to str2 }
asm
        LDS   SI,Str1    { load in DS:SI pointer to str1 }
        CLD              { string operations forward     }
        LES   DI,Str2    { load in ES:DI pointer to str2 }
        XOR   CH,CH      { clear CH                      }
        MOV   CL,[SI]    { length str1 --> CX            }
        INC   CX         { include length byte           }
    REP MOVSB            { copy str1 to str2             }
end  { StrCopy };


procedure Lower( var Str: String );
{ 52 Bytes by Bob Swart, 11-6-1993, FidoNet '80XXX' FASTEST! }
InLine(
  $8C/$DA/               {       mov   DX,DS                 }
  $BB/Ord('A')/
      Ord('Z')-Ord('A')/ {       mov   BX,'Z'-'A'/'A'        }
  $5E/                   {       pop   SI                    }
  $1F/                   {       pop   DS                    }
  $FC/                   {       cld                         }
  $AC/                   {       lodsb                       }
  $88/$C1/               {       mov   CL,AL                 }
  $30/$ED/               {       xor   CH,CH                 }
  $D1/$E9/               {       shr   CX,1                  }
  $73/$0B/               {       jnc   @Part1                }
  $AC/                   {       lodsb                       }
  $28/$D8/               {       sub   AL,BL                 }
  $38/$F8/               {       cmp   AL,BH                 }
  $77/$04/               {       ja    @Part1                }
  $80/$44/$FF/
      Ord('a')-Ord('A')/ {@Loop: ADD   Byte Ptr[SI-1],'a'-'A'}
  $E3/$14/               {@Part1:jcxz  @Exit                 }
  $AD/                   {       lodsw                       }
  $28/$D8/               {       sub   AL,BL                 }
  $38/$F8/               {       cmp   AL,BH                 }
  $77/$04/               {       ja    @Part2                }
  $80/$44/$FE/
      Ord('a')-Ord('A')/ {       ADD   Byte Ptr[SI-2],'a'-'A'}
  $49/                   {@Part2:dec   CX                    }
  $28/$DC/               {       sub   AH,BL                 }
  $38/$FC/               {       cmp   AH,BH                 }
  $77/$EC/               {       ja    @Part1                }
  $EB/$E6/               {       jmp   @Loop                 }
  $8E/$DA                {@Exit: mov   DS,DX                 }
) { LowerFast };


procedure White2Space( var Str: string; const WhiteSpace: string ); assembler;
  { replace white space chars in Str by spaces
    the string WhiteSpace contains the chars to replace }
asm     { setup }
        cld                      { string operations forwards    }
        les   di, str            { ES:DI points to Str           }
        xor   cx, cx             { clear cx                      }
        mov   cl, [di]           { length Str in cl              }
        jcxz  @exit              { if length of Str = 0, exit    }
        inc   di                 { point to 1st char of Str      }
        mov   dx, cx             { store length of Str           }
        mov   bx, di             { pointer to Str                }
        lds   si, WhiteSpace     { DS:SI points to WhiteSpace    }
        mov   ah, [si]           { load length of WhiteSpace     }

@start: cmp   ah, 0              { more chars WhiteSpace left?   }
        jz    @exit              { no, exit                      }
        inc   si                 { point to next char WhiteSpace }
        mov   al, [si]           { next char to hunt             }
        dec   ah                 { ah counting down              }
        xor   dh, dh             { clear dh                      }
        mov   cx, dx             { restore length of Str         }
        mov   di, bx             { restore pointer to Str        }
        mov   dh, ' '            { space char                    }
@scan:
  repne scasb                    { the hunt is on                }
        jnz   @next              { white space found?            }
        mov   [di-1], dh         { yes, replace that one         }
@next:  jcxz  @start             { if no more chars in Str       }
        jmp   @scan              { if more chars in Str          }
@exit:
end  { White2Space };


procedure RTrim( var Str: string ); assembler;
  { remove trailing spaces from str }
asm     { setup }
        std                      { string operations backwards   }
        les   di, str            { ES:DI points to Str           }
        xor   cx, cx             { clear cx                      }
        mov   cl, [di]           { length Str in cl              }
        jcxz  @exit              { if length of Str = 0, exit    }
        mov   bx, di             { bx points to Str              }
        add   di, cx             { start with last char in Str   }
        mov   al, ' '            { hunt for spaces               }

        { remove trailing spaces }
   repe scasb                    { the hunt is on                }
        jz    @null              { only spaces?                  }
        inc   cx                 { no, don't lose last char      }
@null:  mov   [bx], cl           { overwrite length byte of Str  }
@exit:
end  { RTrim };


procedure LTrim( var Str: string ); assembler;
  { remove leading spaces from str }
asm     { setup }
        cld                      { string operations forward          }
        lds   si, str            { DS:SI points to Str                }
        xor   cx, cx             { clear cx                           }
        mov   cl, [si]           { length Str --> cl                  }
        jcxz  @exit              { if length Str = 0, exit            }
        mov   bx, si             { save pointer to length byte of Str }
        inc   si                 { 1st char of Str                    }
        mov   di, si             { pointer to 1st char of Str --> di  }
        mov   al, ' '            { hunt for spaces                    }
        xor   dx, dx             { clear dx                           }

@start: { look for leading spaces }
   repe scasb                    { the hunt is on                     }
        jz    @done              { if only spaces, we are done        }
        inc   cx                 { no, don't lose 1st non-blank char  }
        dec   di                 { no, don't lose 1st non-blank char  }
        mov   dx, cx             { new lenght of Str                  }
        xchg  di, si             { swap si and di                     }
    rep movsb                    { move remaining part of Str         }
@done:  mov   [bx], dl           { new length of Str                  }
@exit:
end  { LTrim };


function LineOK( var str: string ) : Boolean; assembler;
  { Line contains chars > ASCII 20h ? }
asm     { setup }
        xor   ax, ax         { assume false return value        }
        xor   cx, cx         { clear cx                         }
        lds   si, str        { load in DS:SI pointer to Str     }
        mov   cl, [si]       { length Str --> cx                }
        jcxz  @exit          { if no characters, exit           }
        inc   si             { point to 1st character           }

        { look for chars > ASCII 20h }
@start: mov   bl, [si]       { load character                   }
        cmp   bl, ' '        { char > ASCII 20h?                }
        ja    @yes           { yes, return true                 }
        inc   si             { next character                   }
        dec   cx             { count down                       }
        jcxz  @exit          { if no more characters left, exit }
        jmp   @start         { try again                        }
@yes:   mov   ax, 1          { return value true                }
@exit:
end  { LineOK };


procedure TestLine( var Line, tmp : string );
var
  len: byte absolute Line;

  procedure TrimLine;
  begin
    White2Space( Line, WhiteSpace );  { white space to spaces   }
    RTrim( Line );                    { remove trailing spaces  }
  end;

  procedure TrimPlus;
  begin
    TrimLine;
    while Line[len] = '+' do
    begin
      dec( len );
      TrimLine;
    end;
  end;

begin
  TrimPlus;
  while not Eof( OldFile ) and ( IOResult = 0 ) and ((length( Line ) = 0) or not LineOK( Line )) do
  begin
    ReadLn( OldFile, Line );
    TrimPlus;
  end;
  StrCopy( Line, tmp );             { copy to temp string     }
  LTrim( tmp );                     { remove leading spaces   }
  Lower( tmp );                     { translate to lower case }
end;  { TestLine }


begin
  if ParamCount > 1 then             { parameters file1 file2 }
  begin
    Fname1 := FExpand( ParamStr( 1 ) );
    Fname2 := FExpand( ParamStr( 2 ) );
    tel := 0;
    if FileExist( Fname1 ) then
    begin
      if OpenTextFile( OldFile, Fname1, OldBuf ) then
      begin
        if CreateTextFile( NewFile, Fname2, NewBuf ) then
        begin
          ReadLn( OldFile, Line1 );

          if not Eof( OldFile ) and ( IOResult = 0 ) then
          begin
            TestLine( Line1, tmp1 );
            if length( Line1 ) > 0 then
            begin
              WriteLn( NewFile, Line1 );
              tel := 1;
            end;
            ReadLn( OldFile, Line2 );
          end;

          while not Eof( OldFile ) and ( IOResult = 0 ) do
          begin
            TestLine( Line2, tmp2 );
            if (length( Line2 ) > 0) and (tmp1 <> tmp2) then
            begin
              StrCopy( Line2, Line1 );         { copy Line2 to Line1 }
              StrCopy( tmp2, tmp1 );           { copy tmp2  to tmp1  }
              WriteLn( NewFile, Line1 );
              inc( tel );
            end;
            ReadLn( OldFile, Line2 );
          end {while not eof};

          TestLine( Line2, tmp2 );
          if (length( Line2 ) > 0) and (tmp1 <> tmp2) then
          begin
            WriteLn( NewFile, Line2 );
            inc( tel );
          end;

          writeln (tel, ' unique lines');
          Close( NewFile );
          Close( OldFile );
        end { if create file2 }
        else
          writeln(' error creating file ', Fname1 );
        { error creating file }
      end { if open file1 }
      else
        writeln(' error opening file ', Fname1 );
      { error opening file }
    end { if FileExist( Fname1 ) }
    else
      writeln( Fname1, ' not found' );
    { file not found }
  end { if ParamCount > 1 }
  else
    Writeln( 'Dup file1 file2' );
end.

[Back to TEXTFILE SWAG index]  [Back to Main SWAG index]  [Original]