#
# Generates dictionaries and word lists.

cd `dirname $0`

LANG=C
nf_facile="aison lit nit ssion ation ction ition ution phobie nerie trie"
nm_facile="ment isme ant ....age"

rm -f tt
for t in $nf_facile; do
 grep $t"$" nf >>tt
done
cat tt | sort | uniq >t2
comm -12 nf t2 >nf1
comm -23 nf t2 >nf2
comm -12 nf2 difficulty >nf3

rm -f tt
for t in $nm_facile; do
 grep $t"$" nm >>tt
done
cat tt | sort | uniq >t2
comm -12 nm t2 >nm1
comm -23 nm t2 >nm2
comm -12 nm2 difficulty >nm3
rm -f tt t2

##########################################################

cd ..
listdic=listdic

suffixes=`awk -F= '$1 ~ /suffixes/ {print $2}' ../defs | tr , ' '`
levels=`awk -F= '$1 ~ /levels/ {print $2}' ../defs | tr , ' '`

mkdir -p freq rare
flist="nm nf nm1 nf1 nm2 nf2 nm3 nf3 nmf"

for f in $flist
do
 comm -12 raw/$f raw/fr.freq >freq/$f
 comm -23 raw/$f freq/$f >rare/$f
done
comm -12 raw/difficulty rare/nm >rare/nmd
join -j 1 -t: raw/m2f raw/fr.freq >freq/m2f
comm -23 raw/m2f freq/m2f >rare/m2f

for l in CP CE1 CE2 CM1 CM2 6e 5e
do
 mkdir -p $l
 for f in $flist
 do
  comm -12 raw/$f db/db-$l >$l/$f
 done
 join -j 1 -t: raw/m2f db/db-$l >$l/m2f
done

rm -f $listdic

f2l() {
 list=`awk 'BEGIN {a=0};
	a>0 && NF>0 {printf(",")};
	NF>0 {printf("%s",$0); a++};' $2`
 echo "$1:$list" >>$listdic
}

f2l nm-CP CP/nm
f2l nf-CP CP/nf
f2l nm-CE1 CE1/nm
f2l nf-CE1 CE1/nf
f2l nm-CE2 CE2/nm
f2l nf-CE2 CE2/nf
f2l nm-CE2d CE2/nm3
f2l nf-CE2d CE2/nf3
f2l nm-CM1 CM1/nm2
f2l nf-CM1 CM1/nf2
f2l nm-CM1d CM1/nm3
f2l nf-CM1d CM1/nf3
f2l nm-CM2 CM2/nm2
f2l nf-CM2 CM2/nf2
f2l nm-CM2d CM2/nm3
f2l nf-CM2d CM2/nf3
f2l nm-CM2 CM2/nm2
f2l nf-CM2 CM2/nf2
f2l nm-CM2d CM2/nm3
f2l nf-CM2d CM2/nf3
f2l nm-6e 6e/nm2
f2l nf-6e 6e/nf2
f2l nm-6ed 6e/nm3
f2l nf-6ed 6e/nf3
f2l nm-5e 5e/nm2
f2l nf-5e 5e/nf2
f2l nm-5ed 5e/nm3
f2l nf-5ed 5e/nf3
f2l nm-f freq/nm1
f2l nf-f freq/nf1
f2l nm-freq freq/nm2
f2l nf-freq freq/nf2
f2l nm-freqd freq/nm3
f2l nf-freqd freq/nf3

for b in $levels; do
 if [ -d $b ]; then
  f2l nmf-$b $b/nmf
  awk -F: 'NF>0 {print $1}' $b/m2f >tmp
  f2l m2f-$b tmp
 fi
done

awk -F: 'NF>0 {print $1}' raw/both >tmp
f2l mf-both tmp
cp raw/both both.dic

for s in $suffixes; do
 s2=`echo "$s" | tr '' 'ee' | tr -d '[]^'`
 grep -E $s\$ raw/nm >tmp
 f2l nm-suf-$s2 tmp
 grep -E $s\$ raw/nf >tmp
 f2l nf-suf-$s2 tmp
done

for dir in raw rare; do
 for typ in nm nf; do
  awk 'NF>0 {print $0":"$0}' ${dir}/${typ} | sort >${typ}-${dir}.dic
  awk 'NF>0 {print ":"$0}' ${dir}/${typ}2 >${typ}-${dir}.rec
  awk 'NF>0 {print $0":"$0}' ${dir}/${typ}3 | sort >${typ}-${dir}d.dic
  awk 'NF>0 {print ":"$0}' ${dir}/${typ}3 >${typ}-${dir}d.rec
 done
done
awk -F: 'NF>0 {print $1}' raw/m2f >tmp
cat tmp raw/nm | sort | uniq | awk '{print $0":"$0}' >nm-raw.dic
awk -F: 'NF>0 {print $2}' raw/m2f >tmp
cat tmp raw/nf | sort | uniq | awk '{print $0":"$0}' >nf-raw.dic
cat raw/m2f | sort -t: -k1 | uniq >m2f.dic

rm -f tmp
rm -f n?[123]
rm -fR C* rare freq [1-9]e raw/n?[123]

