7: Streams

7.1: Counting sentences: stream basics

SML full stop count
(* full_stop_count : char list -> int *)
fun full_stop_count cs
    = let
         fun is_full_stop c = c = "."
      in
         length (filter is_full_stop cs)
      end ;
SML stream to list
(* stream_to_list : instream -> char list *)
fun stream_to_list stream
    = if end_of_stream stream
         then []
         else input(stream,1) :: stream_to_list stream ;
SML inefficient sentence count
(* sentence_count : instream -> int *)
fun sentence_count stream
    = full_stop_count (stream_to_list stream) ;
C efficient full stop count
int full_stop_count( char_list cs ) {
  int stops = 0 ;
  while( cs != NULL ) {
    if( head( cs ) == '.' ) {
      stops++ ;
    }
    cs = tail( cs ) ;
  }
  return stops ;
}
C inefficient stream to list
char_list stream_to_list( FILE * stream ) {
  int c = getc( stream ) ;
  if( c == EOF ) {
    return NULL ;
  } else {
    return cons( c, stream_to_list( stream ) ) ;
  }
}
C inefficient sentence count
int sentence_count( FILE * stream ) {
  return full_stop_count( stream_to_list( stream ) ) ;
}

7.1.1: Efficiently transferring a stream to a list

SML compare stream to list
fun stream_to_list stream
    = if end_of_stream stream
      then []
      else input(stream,1) ::
           stream_to_list stream ;
SML compare copy
fun copy xs
    = if xs = []
      then []
      else head xs ::
           copy (tail xs) ;
C stream to list with open list
char_list stream_to_list( FILE * stream ) {      /* name  */
  char_list accu = NULL ;
  char_list last ;
  int c;                                 /* c declaration */
  if( (c = getc(stream) ) != EOF ) {       /* test on end */
    last = accu
         = cons( c, NULL ) ;              /* c not head.. */
    /* no statement here because getc side effects        */
    while( (c = getc(stream) ) != EOF ) {  /* test on end */
      last = last->list_tail
           = cons( c, NULL ) ;            /* c not head.. */
      /* no statement here because getc side effects      */
    }
  }
  return accu ;
}

7.1.2: Avoiding the intermediate list

SML efficient sentence count
(* sentence_count : instream -> int *)
fun sentence_count stream
    = let
         fun count stops
             = if end_of_stream stream
                  then stops
                  else if input (stream, 1) = "."
                          then count (stops+1)
                          else count stops
      in
         count 0
      end ;
C efficient sentence count
int sentence_count( FILE * stream ) {
  int stops = 0 ;
  while( true ) {
    int c = getc( stream ) ;
    if( c == EOF ) {
      return stops ;
    } else if( c == '.' ) {
      stops++ ;
    }
  }
}

7.1.3: IO in C: opening files as streams

C scanf d c lf
int i, n ;
char c ;
double d ;
n = scanf("%d %c %lf", &i, &c, &d ) ;
C scanf 123 q
123 q 3.14
C scanf 123/
123/*3.14
C scanf monkey
int i, n ;
char c ;
n = scanf( "Monkey: %d %c", &i, &c ) ;
SH monkey
Monkey: 13       q
SH money
Money: 100000
C scanf error
int i, n ;
char c ;
double d ;
n = scanf("%lf %d %c", &i, &c, &d ) ;
C printf hello
putchar( 'H' ) ; putchar( 'e' ) ; putchar( 'l' ) ;
putchar( 'l' ) ; putchar( 'o' ) ; putchar( '\n' ) ;
C fscanf d c lf
int i, n ;
double d ;
FILE *in  = fopen( "input.me", "r" ) ;
if( in != NULL ) {
  FILE *out = fopen( "result", "w" ) ;
  if( out != NULL ) {
    n = fscanf( in, "%d %lf", &i, &d ) ;
    if( n == 2 ) {
      fprintf( out, "%f\n", d+i ) ;
    } else {
      fprintf( stderr, "Wrong format in input\n" ) ;
    }
    fclose( out ) ;
  }
  fclose( in ) ;
}

7.2: Mean sentence length: how to avoid state

SML efficient character count
(* character_count : instream -> int *)
fun character_count stream
    = let
         fun count chars
             = if end_of_stream stream
                  then chars
                  else let
                          val c = input (stream, 1)
                       in
                          count (chars+1)
                       end
      in
         count 0
      end ;
SML incorrect mean sentence length
(* mean_sentence_length : instream -> int *)
fun mean_sentence_length stream
    = (character_count stream) div
      (sentence_count stream) ;
SML mean sentence length
(* mean_sentence_length : instream -> int *)
fun mean_sentence_length stream
    = let
         fun count chars stops
             = if end_of_stream stream
                  then chars div stops
                  else if input (stream, 1) = "."
                          then count (chars+1) (stops+1)
                          else count (chars+1) stops
      in
         count 0 0
      end ;
C mean sentence length
int mean_sentence_length( FILE * stream ) {
  int chars = 0 ;
  int stops = 0 ;
  int c ;
  while( true ) {
    c = getc( stream ) ;
    if( c == EOF ) {
      return chars / stops ;
    } else if( c == '.' ) {
      stops++ ;
    }
    chars++ ;
  }
}

7.3: Counting words: how to limit the size of the state

SML word count on lists
(* word_count : char list -> char list -> int *)
fun word_count ws []      = 0
  | word_count ws (t::ts) = if match ws (t::ts)
                               then 1 + word_count ws ts
                               else     word_count ws ts ;
SML match on lists
(* match : char list -> char list -> bool *)
fun match []      t       = true
  | match (w::ws) []      = false
  | match (w::ws) (t::ts) = w = (t:char) andalso match ws ts ;
SML main word count on a list
(* main : instream -> int *)
fun main stream =
  let
     val word = explode "cucumber"
     val text = stream_to_list stream
  in
     word_count word text
  end ;

7.3.1: Using a sliding queue

7.3.2: Implementing the sliding queue in SML

SML sliding list based queue
datatype 'a queue = Queue of (instream * 'a list) ;
SML create a list based queue
(* create : instream -> int -> char queue *)
fun create stream n
    = Queue (stream, stream_to_list stream n) ;

Answer to exercise 7.9

SML stream to list accessing at most n elements
(* stream_to_list : instream -> int -> char list *)
fun stream_to_list stream n
    = if end_of_stream stream orelse n = 0
         then []
         else input(stream,1) :: stream_to_list stream (n-1) ;
C stream to list accessing at most n elements
char_list stream_to_list( FILE * stream, int n ) {
  int c = getc( stream ) ;
  if( c == EOF || n == 0 ) {
    return NULL ;
  } else {
    return cons( c, stream_to_list( stream, n-1 ) ) ;
  }
}
SML fetch contents of a list based queue
(* fetch : 'a queue -> 'a list *)
fun fetch (Queue (stream, list)) = list ;
SML advance a list based queue
(* advance : char queue -> char queue *)
fun advance (Queue (stream, list))
    = if end_of_stream stream
         then Queue (stream, tail list)
         else Queue (stream, tail list @
                             [input (stream, 1)] ) ;
SML is empty on a list based queue
(* is_empty : 'a queue -> bool *)
fun is_empty (Queue (stream, list)) = list = [] ;
SML word count on a list based queue
(* word_count : char list -> char queue -> int *)
fun word_count ws ts
    = if is_empty ts
         then 0
         else if match ws (fetch ts)
                 then 1 + word_count ws (advance ts)
                 else     word_count ws (advance ts) ;
SML main word count on a list based queue
(* main : instream -> int *)
fun main stream =
  let
     val word = explode "cucumber"
     val text = create stream (length word)
  in
     word_count word text
  end ;

7.3.3: Implementing the sliding queue in C

C sliding list based queue
typedef struct queue_struct {
  FILE      * queue_stream ;
  char_list queue_first ;
  char_list queue_last ;
} * char_queue ;
C create a list based queue
#define queue_struct_size sizeof( struct queue_struct )

char_queue create( FILE * stream, int n ) {
  char_queue q   = malloc( queue_struct_size ) ;
  if( q == NULL ) {
    printf( "create: no space\n" ) ;
    abort( ) ;
  }
  q->queue_stream= stream ;
  q->queue_first = stream_to_list( stream, n ) ;
  q->queue_last  = find_last( q->queue_first ) ;
  return q ;
}
C find last tail
char_list find_last( char_list current ) {
  char_list previous = NULL ;
  while( current != NULL ) {
    previous = current ;
    current  = tail( current ) ;
  }
  return previous ;
}
C fetch contents of a list based queue
char_list fetch( char_queue q ) {
  return q->queue_first ;
}
C advance a list based queue
void advance( char_queue q ) {
  char_list old, new;
  int c = getc( q->queue_stream ) ;
  if( c != EOF ) {
    new = cons( c, NULL ) ;                     /* 1 */
    q->queue_last->list_tail = new ;            /* 2 */
    q->queue_last = new ;                       /* 3 */
  }
  old = q->queue_first ;                        /* 4 */
  q->queue_first = q->queue_first->list_tail ;  /* 5 */
  free( old ) ;                                 /* 6 */
}
C is empty on a list based queue
bool is_empty( char_queue q ) {
  return q->queue_first == NULL ;
}
C word count on a list based queue
int word_count( char_list ws, char_queue ts ) {
  int accu = 0 ;
  while( ! is_empty( ts ) ) {
    if( match( ws, fetch( ts ) ) ) {
      accu++ ;
    }
    advance( ts ) ;
  }
  return accu ;
}

7.3.4: Counting words using arrays

SML sliding array based queue
datatype 'a queue = Queue of (instream * int * 'a array) ;
C sliding array based queue
typedef struct queue_struct {
  FILE * queue_stream ;
  int  queue_valid ;
  char * queue_array ;
} * char_queue ;
SML create an array based queue
(* create : instream -> int -> char queue *)
fun create stream n
    = let
         val list  = stream_to_list stream n
         val valid = length list - 1
         val array = list_to_array list
      in
         Queue (stream, valid, array)
      end ;
C create an array based queue
#define queue_struct_size sizeof( struct queue_struct )

char_queue create( FILE * stream, int n ) {
  char_list list  = stream_to_list( stream, n ) ;
  char_queue q    = malloc( queue_struct_size ) ;
  if( q == NULL ) {
    printf( "create: no space\n" ) ;
    abort( ) ;
  }
  q->queue_stream = stream ;
  q->queue_valid  = length( list )-1 ;
  q->queue_array  = list_to_array( list ) ;
  return q ;
}
SML fetch contents of an array based queue
(* fetch : 'a queue -> 'a array *)
fun fetch (Queue (stream, valid, array)) = array ;
C fetch contents of an array based queue
char * fetch( char_queue q ) {
  return q->queue_array ;
}
SML advance an array based queue
(* advance : char queue -> char queue *)
fun advance (Queue (stream, valid, array))
    = let
         fun shift i = if i < valid
                          then sub (array, i+1)
                          else input (stream, 1)
      in
         if end_of_stream stream
            then Queue (stream, valid-1, tabulate (valid, shift))
            else Queue (stream, valid, tabulate (valid+1, shift))
      end ;
C advance an array based queue
void advance( char_queue q ) {
  int c = getc( q->queue_stream ) ;
  int i ;
  for (i = 0; i < q->queue_valid; i++) {
    q->queue_array[i] = q->queue_array[i+1] ;
  }
  if( c == EOF ) {
    q->queue_valid -- ;
  } else {
    q->queue_array[q->queue_valid] = c ;
  }
}
SML is empty on an array based queue
(* is_empty : 'a queue -> bool *)
fun is_empty (Queue (stream, valid, array)) = valid = ~1 ;
C is empty on an array based queue
bool is_empty( char_queue q ) {
  return q->queue_valid == -1 ;
}

7.4: Quicksort

7.4.1: Quicksort on the basis of lists

SML qsort list
(* qsort : char list -> char list *)
fun qsort []      = []
  | qsort (p::xs) = let
                       fun less_eq x = x <= (p:char)
                       fun greater x = x >  (p:char)
                    in
                       qsort (filter less_eq xs)
                           @ [p] @
                       qsort (filter greater xs)
                    end ;
C qsort list
char_list qsort( char_list p_xs ) {
  if( p_xs == NULL) {
    return NULL ;
  } else {
    char      p  = head( p_xs ) ;
    char_list xs = tail( p_xs ) ;
    char_list ls = extra_filter( less_eq, &p, xs ) ;
    char_list gs = extra_filter( greater, &p, xs ) ;
    return append( qsort( ls ),
                   append( cons( p, NULL ),
                           qsort( gs ) ) ) ;
  }
}
C less eq
bool less_eq( void * arg,
              char x ) {
  char * p = arg ;
  return x <= * p ;
}
C greater
bool greater( void * arg,
              char x ) {
  char * p = arg ;
  return x > * p ;
}

Answer to exercise 7.14

C free list
void free_list( char_list xs ) {
  while( xs != NULL ) {
    const char_list last = xs ;
    xs = tail( xs ) ;
    free( last ) ;
  }
}
C qsort list, with garbage collection
char_list qsort( char_list p_xs ) {
  if( p_xs == NULL) {
    return NULL ;
  } else {
    char      p  = head( p_xs ) ;
    char_list xs = tail( p_xs ) ;
    char_list less  = extra_filter( less_eq, &p, xs ) ;
    char_list more  = extra_filter( greater, &p, xs ) ;
    char_list sorted_less = qsort( less ) ;
    char_list sorted_more = qsort( more ) ;
    char_list pivot  = cons( p, NULL ) ;
    char_list sorted = append( sorted_less,
			 append( pivot, sorted_more ) ) ;
    free_list( pivot ) ;
    free_list( less ) ;
    free_list( more ) ;
    free_list( sorted_less ) ;
    return sorted ;
  }
}
C no append to pivot
    ...
    char_list pivot  = cons( p, sorted_more ) ;
    char_list sorted = append( sorted_less, pivot ) ;
    free_list( less ) ;
    ...

7.4.2: Quicksort on the basis of arrays

SML swap
(* swap : 'a array -> int -> int -> 'a array *)
fun swap data i j
    = let
         val data_i = sub (data,  i)
         val data_j = sub (data,  j)
         val data'  = upd (data,  j, data_i)
         val data'' = upd (data', i, data_j)
      in
         data''
      end ;
C swap
void swap( char data[], int i, int j ) {
  char data_i = data[i] ;
  char data_j = data[j] ;
  data[i] = data_j ;
  data[j] = data_i ;
}
SML qsort array
(* qsort : char array -> int -> int -> char array *)
fun qsort data l r
    = if l >= r
         then data
         else let
                 val p       = l
                 val data_p  = sub (data, p)
                 val i       = l
                 val j       = r
                 val (data', i', j')
                             = partition p data_p data l i j r
                 val data''  = qsort data'  l  j'
                 val data''' = qsort data'' i' r
              in
                 data'''
              end ;
C qsort array
void qsort( char data [], int l, int r ) {
  if( l < r) {
    int p = l ;
    char data_p = data[p] ;
    int i = l ;
    int j = r ;
    partition( p, data_p, data, l, &i, &j, r ) ;
    qsort( data, l, j ) ;
    qsort( data, i, r ) ;
  }
}
SML partition array
(* partition : int -> char -> char array ->
               int -> int -> int -> int ->
               (char array * int * int) *)
fun partition p data_p data l i j r
    = let
         val i' = up   data_p data i r
         val j' = down data_p data l j
      in
         if i' < j'
            then partition p data_p (swap data i' j')
                           l (i'+1) (j'-1) r
            else if i' < p
                    then (swap data i' p, i'+1, j')
                    else if p < j'
                            then (swap data p j', i', j'-1)
                            else (data, i', j')
      end ;
C partition array
void partition( int p, char data_p, char data[],
                int l, int *i, int *j, int r ) {
  while( true ) {
    *i = up(   data_p, data, *i, r ) ;
    *j = down( data_p, data, l, *j ) ;
    if( *i < *j ) {
      swap( data, *i, *j ) ;
      (*i)++ ;
      (*j)-- ;
    } else if( *i < p ) {
      swap( data, *i, p ) ;
      (*i)++ ;
      return ;
    } else if( p < *j ) {
      swap( data, p, *j ) ;
      (*j)-- ;
      return ;
    } else {
      return ;
    }
  }
}
SML up array
(* up : char -> char array -> int -> int -> int *)
fun up data_p data i r
    = if i < r andalso sub (data, i) <= (data_p:char)
         then up data_p data (i+1) r
         else i ;
SML down array
(* down : char -> char array -> int -> int -> int *)
fun down data_p data l j
    = if l < j andalso sub (data, j) >= (data_p:char)
         then down data_p data l (j-1)
         else j ;
C up array
int up( char data_p, char data[], int i, int r ) {
  while( i < r && data[i] <= data_p ) {
    i++ ;
  }
  return i ;
}
C down array
int down( char data_p, char data[], int l, int j ) {
  while( l < j && data[j] >= data_p ) {
    j-- ;
  }
  return j ;
}
C wrong ordering of &&
  while( data[i] <= data_p && i < r )
SML main qsort array
(* main : char array *)
val main = let
              val list = explode "ECFBACG"
              val l    = 0
              val r    = length list - 1
              val data = list_to_array list
           in
              qsort data l r
           end ;
C main array
int main( void ) {
  int l = 0 ;
  int r = 6 ;
  char data[] = "ECFBACG" ;
  qsort( data, l, r ) ;
  printf( "%s\n", data ) ;
  return 0 ;
}

Answer to exercise 7.16

C qsort array with integrated partition
void qsort( char data [], int l, int r ) {
  if( l < r) {
    int p = l ;
    char data_p = data[p] ;
    int i = l ;
    int j = r ;
    while( true ) {
      i = up(   data_p, data, i, r ) ;
      j = down( data_p, data, l, j ) ;
      if( i < j ) {
        swap( data, i, j ) ;
        i++ ;
        j-- ;
      } else if( i < p ) {
        swap( data, i, p ) ;
        i++ ;
        break ;
      } else if( p < j ) {
        swap( data, p, j ) ;
        j-- ;
        break ;
      } else {
        break ;
      }
    }
    qsort( data, l, j ) ;
    qsort( data, i, r ) ;
  }
}

Summary

Further exercises

Exercise 7.18

SH database file
John#18813#1963#80#90#75#20#69#
Mary#19900#1946#72#83#75#18#75#
Bob#12055#1969#120#110#100#99#99#
Alice#15133#1972#200#230#75#11#35#

Exercise 7.21

SH cat
(a.out +k | a.out -k) = cat

Answer to exercise 7.21

C Caesar cipher program
#include <stdio.h>
#include <stdlib.h>

void crypt( FILE * in, FILE * out, int k ) {
  char c ;
  while( (c = getc( in ) ) != EOF ) {
    putc( c + k, out ) ;
  }
}

int main( int argc, char * argv [] ) {
  if( argc != 2 ) {
    printf( "usage: %s [+|-]number\n", argv[0] ) ;
    return 1 ;
  } else {
    char * rest ;
    int k = strtol( argv[1], &rest, 10 ) ;
    if( *rest != '\0' ) {
      printf( "usage: %s illegal argument %s\n",
              argv[0], argv[1] ) ;
      return 1 ;
    } else {
      crypt( stdin, stdout, k ) ;
      return 0 ;
    }
  }
}