File tree Expand file tree Collapse file tree 1 file changed +24
-1
lines changed Expand file tree Collapse file tree 1 file changed +24
-1
lines changed Original file line number Diff line number Diff line change 2
2
3
3
function usage {
4
4
echo " usage: <n>$0 "
5
+ echo " note: n is the number of essays to download"
6
+ echo " for specific n, the resulting pg.txt file will have the following number of tokens:"
7
+ echo " n | tokens"
8
+ echo " --- | ---"
9
+ echo " 1 | 6230"
10
+ echo " 2 | 23619"
11
+ echo " 5 | 25859"
12
+ echo " 10 | 36888"
13
+ echo " 15 | 50188"
14
+ echo " 20 | 59094"
15
+ echo " 25 | 88764"
16
+ echo " 30 | 103121"
17
+ echo " 32 | 108338"
18
+ echo " 35 | 113403"
19
+ echo " 40 | 127699"
20
+ echo " 45 | 135896"
5
21
exit 1
6
22
}
7
23
@@ -33,10 +49,17 @@ if [ -f pg.txt ]; then
33
49
rm pg.txt
34
50
fi
35
51
52
+ c=1
36
53
for url in $urls ; do
37
54
echo " processing $url "
38
55
39
- curl -L $url | html2text | tail -n +4 | sed -E " s/^[[:space:]]+//g" | fmt -w 80 >> pg.txt
56
+ cc=$( printf " %03d" $c )
57
+
58
+ curl -L $url | html2text | tail -n +4 | sed -E " s/^[[:space:]]+//g" | fmt -w 80 >> pg-$cc -one.txt
59
+ cat pg-$cc -one.txt >> pg.txt
60
+
61
+ cp -v pg.txt pg-$cc -all.txt
62
+ c=$(( c+ 1 ))
40
63
41
64
# don't flood the server
42
65
sleep 1
You can’t perform that action at this time.
0 commit comments