#! /usr/bin/ruby
# mail indexer prints filename and file offset of each message
# along with the email addresses in that message
#
# use it to generate a database for later use with 
# mailidx-mbox and formail
#
# examples:
#
#   mailbox-positions > /tmp/pos  # indexes all email addresses
#   : > /tmp/msgs
#   grep -i '@noserose\.' /tmp/pos | egrep -v '(\.nov|~)$' | sort | uniq | mailidx-mbox | formail -D 10000000 /tmp/msgs -s > /tmp/noserose
# 
#   mailidx-positions ~/Mail 'Winged Migration' | sort | uniq > /tmp/pos
#   : > /tmp/msgs
#   egrep -v '(\.nov|~)$' < /tmp/pos | mailidx-mbox | formail -D 10000000 /tmp/msgs -s > /tmp/winged
#

require 'find'

def positions(f, m)
  n = 0
  msg = 0
  id = ''
  addrs = []

  File.open(f).each_line { |s|
    msg = n if s =~ /^From\s/
    if m then
      if (s =~ Regexp.new(m)) then
        puts "match #{msg} #{f}"
      end
    else
      s.split().each { |w|
        next unless w =~ /(\w+@[-\w.]+)/
        puts "#{$1} #{msg} #{f}"
      }
    end
    n += s.length
  }
end

d = ENV['HOME'] + '/Mail'
if ARGV.length > 0 then
  d = ARGV[0]
end
match = nil
if ARGV.length > 1 then
  match = ARGV[1]
end
Find.find(d) { |f|
  next unless File.stat(f).file?
  positions(f, match)
}
