Skip to content

Commit 8ac76a2

Browse files
committed
ParserV1 cleaned up.
* isTerminationLine, parseLine, moveToEntryAtOffset and moveToNextEntry functions are now implemented in the base class. * New AbstractKindleParserConstants class holds parser parameters for a specific parser implementation.
1 parent 965ab84 commit 8ac76a2

File tree

3 files changed

+139
-83
lines changed

3 files changed

+139
-83
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* AbstractKindleParserConstants Class
3+
*
4+
* Parsing of a Clippings file depends on a few constancts. These contants is determines a
5+
* particular format of the clippings file.
6+
*
7+
* Dated: 11 July 2022
8+
* Author: arjobmukherjee@gmail.com
9+
*/
10+
11+
package coderarjob.kpdfsync.lib.clipparser;
12+
13+
public abstract class AbstractKindleParserConstants
14+
{
15+
16+
/** When annotation line split by spaces, this is the index at which annotation type (Note or
17+
* Highlight) can be found.
18+
*/
19+
public abstract int getAnnotationLineTypePosition ();
20+
21+
/** When annotation line split by spaces, this is the index at which Page number type (Page or
22+
* location) can be found.
23+
*/
24+
public abstract int getAnnotationLinePageNumberTypePosition ();
25+
26+
/** When annotation line split by spaces, this is the index at which Page number or location
27+
* number can be found.
28+
*/
29+
public abstract int getAnnotationLinePageOrLocationNumberPosition ();
30+
31+
/** Pattern by which the end of a block (i.e Termination line) is recognized. */
32+
public abstract String getTeminationLinePattern ();
33+
}

src/coderarjob/kpdfsync/lib/clipparser/AbstractParser.java

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import java.io.FileNotFoundException;
88

99
import coderarjob.ajl.file.ByteOrderMarkTypes;
10+
import coderarjob.kpdfsync.lib.clipparser.ParserResult.SupportedFields;
1011
import coderarjob.ajl.file.ByteOrderMark;
1112

1213
public abstract class AbstractParser
@@ -22,17 +23,23 @@ protected enum ParsingErrors
2223
}
2324

2425
/* Abstract public methods */
25-
public abstract boolean moveToNextEntry() throws Exception;
26-
public abstract void moveToEntryAtOffset (long offset) throws Exception;
27-
public abstract ParsingErrors parseLine(int linei, ParserResult result) throws Exception;
2826
public abstract String getParserVersion();
2927
public abstract String[] getSupportedKindleVersions();
3028

29+
/* Abstract protected methods */
30+
protected abstract ParsingErrors parseLine(int linei, ParserResult res) throws Exception;
31+
protected abstract AbstractKindleParserConstants getKindleParserConstants();
32+
3133
/* Protected fields */
3234
protected String mFileName;
3335
protected RandomAccessFile mFile;
3436
protected Charset mCharset;
3537
protected ParserEvents mParserEvents;
38+
protected boolean mIsInvalidState; /* On paring error, this is set to true.
39+
False indicates, no error, or file pointer has
40+
moved to the next block after the previous parsing
41+
error.*/
42+
protected AbstractKindleParserConstants mConstants = null;
3643

3744
/* Private fields */
3845
private long mLastFilePointer;
@@ -47,7 +54,11 @@ protected enum ParsingErrors
4754
public void setParserEvents (ParserEvents value) { this.mParserEvents = value; }
4855

4956
/* Hook methods */
50-
protected void onParsingStart() throws Exception { }
57+
protected void onParsingStart() throws Exception
58+
{
59+
if (this.mIsInvalidState)
60+
throw new ParserException ("Invalid parser state : On an invalid line.");
61+
}
5162

5263
protected void onParsingSuccess (ParserResult result) throws Exception
5364
{
@@ -58,6 +69,10 @@ protected void onParsingSuccess (ParserResult result) throws Exception
5869
}
5970
protected void onParsingError(String error, ParserResult result) throws Exception
6071
{
72+
/* Until we move past the current block to the next block, parser remains in invalid
73+
* state. */
74+
mIsInvalidState = true;
75+
6176
ParserEvents e = this.mParserEvents;
6277
if (e == null) return;
6378

@@ -82,6 +97,43 @@ public AbstractParser (String fileName) throws FileNotFoundException, IOExceptio
8297
this.mLastLineRead = null;
8398
this.mLastFilePointer = -1;
8499
this.mParserEvents = null;
100+
this.mIsInvalidState = false;
101+
this.mConstants = getKindleParserConstants();
102+
}
103+
104+
/**
105+
* Moves to the Title of the next block from anywhere in the current block.
106+
* Moves to the start of the next block. This methods, does not actually parse the lines, it
107+
* just looks for the next termination line.
108+
*
109+
* Returns True, of next block was found, otherwise False.
110+
*/
111+
public boolean moveToNextEntry() throws Exception
112+
{
113+
String linestr = null;
114+
115+
while (true)
116+
{
117+
linestr = readLineWithProperEncoding ();
118+
if (linestr == null)
119+
return false;
120+
121+
if (isTerminationLine (linestr))
122+
break;
123+
}
124+
125+
/* Move past any invalid block.*/
126+
mIsInvalidState = false;
127+
return true;
128+
}
129+
130+
/**
131+
* Moves the file pointer and assumes the next line read to be Title.
132+
*/
133+
public void moveToEntryAtOffset (long offset) throws Exception
134+
{
135+
mFile.seek(offset);
136+
mIsInvalidState = false;
85137
}
86138

87139
protected Charset getCharsetFromByteOrderMarkType (ByteOrderMarkTypes type)
@@ -112,13 +164,15 @@ public ParserResult parse() throws Exception
112164
return null;
113165
}
114166

115-
try {
167+
try
168+
{
116169
onParsingStart();
117170

118171
for (int i = 0; parseError == ParsingErrors.NO_ERROR; i++)
119172
{
120173
if (Thread.interrupted() == true)
121174
throw new InterruptedException();
175+
122176
parseError = parseLine(i, result);
123177
}
124178

@@ -144,6 +198,25 @@ public ParserResult parse() throws Exception
144198
return result;
145199
}
146200

201+
/** Generates a Parser Exception object for subclasses to use.
202+
* This ensures a consistent Exception description.
203+
*/
204+
protected ParserException genParserException (String stage)
205+
{
206+
String errDes = String.format ("Parsing error: '%s' is not '%s'.",
207+
this.lastLineRead(), stage);
208+
return new ParserException (errDes);
209+
}
210+
211+
/**
212+
* Checks if the specified line is the termination line.
213+
*/
214+
protected boolean isTerminationLine (String linestr)
215+
{
216+
assert (linestr != null);
217+
return linestr.equals(mConstants.getTeminationLinePattern());
218+
}
219+
147220
/**
148221
* Checks if End of File has been reached.
149222
*/

src/coderarjob/kpdfsync/lib/clipparser/KindleParserV1.java

Lines changed: 28 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,25 @@
1818

1919
public class KindleParserV1 extends AbstractParser
2020
{
21-
/*
22-
* On paring error, this is set to true. False indicates, no error, or that file pointer has
23-
* moved to the next block after the previous parsing error.
24-
*/
25-
protected boolean mIsInvalidState;
26-
2721
public KindleParserV1 (String fileName) throws FileNotFoundException, IOException
2822
{
2923
/* Clippings file is opened and onClippingsFileOpen hook method is called. */
3024
super (fileName);
31-
32-
/* Default state */
33-
mIsInvalidState = false;
3425
}
3526

3627
/* Implementing abstract methods from AbstractParser*/
28+
protected AbstractKindleParserConstants getKindleParserConstants ()
29+
{
30+
AbstractKindleParserConstants constants = new AbstractKindleParserConstants () {
31+
public int getAnnotationLineTypePosition() { return 2; }
32+
public int getAnnotationLinePageNumberTypePosition() { return 4; }
33+
public int getAnnotationLinePageOrLocationNumberPosition() { return 5; }
34+
public String getTeminationLinePattern () { return "=========="; }
35+
};
36+
37+
return constants;
38+
}
39+
3740
public String getParserVersion ()
3841
{
3942
return "1.0";
@@ -44,65 +47,36 @@ public String[] getSupportedKindleVersions ()
4447
return new String[] {"1.2.4", "1.2.5", "1.2.6"};
4548
}
4649

47-
/**
48-
* Moves to the Title of the next block from anywhere in the current block.
49-
* Moves to the start of the next block. This methods, does not actually parse the lines, it
50-
* just looks for the next termination line.
51-
*
52-
* Returns True, of next block was found, otherwise False.
53-
*/
54-
public boolean moveToNextEntry () throws IOException
55-
{
56-
String linestr = null;
57-
58-
while (true)
59-
{
60-
linestr = readLineWithProperEncoding ();
61-
if (linestr == null)
62-
return false;
63-
64-
if (isTerminationLine (linestr))
65-
break;
66-
}
67-
68-
/* Move past any invalid block.*/
69-
mIsInvalidState = false;
70-
return true;
71-
}
72-
73-
/**
74-
* Moves the file pointer and assumes the next line read to be Title.
75-
*/
76-
public void moveToEntryAtOffset (long offset) throws IOException
77-
{
78-
mFile.seek(offset);
79-
mIsInvalidState = false;
80-
}
81-
8250
/**
8351
* Parses each line of the current block.
8452
* Returns a ParserResult object with the parsed result.
8553
* Null is returned is EOF was reached.
8654
*/
87-
public ParsingErrors parseLine (int lineIndex, ParserResult result)
55+
protected ParsingErrors parseLine(int lineIndex, ParserResult result)
8856
throws IOException, ParserException
8957
{
9058
ParsingErrors err = ParsingErrors.NO_ERROR;
59+
String lineStr = null;
60+
9161
switch (lineIndex)
9262
{
9363
case 0 :
64+
this.readLineWithProperEncoding();
9465
err = parseTitleLine (result);
9566
err.setTag ("Title line");
9667
break;
9768
case 1 :
69+
this.readLineWithProperEncoding();
9870
err = parseAnnotationLine (result);
9971
err.setTag ("Annotation line");
10072
break;
10173
case 2 :
74+
this.readLineWithProperEncoding();
10275
err = parseTextLine (result);
10376
err.setTag ("Text line");
10477
break;
10578
case 3 :
79+
this.readLineWithProperEncoding();
10680
err = parseTerminationLine (result);
10781
err.setTag ("Termination line");
10882
break;
@@ -114,35 +88,17 @@ public ParsingErrors parseLine (int lineIndex, ParserResult result)
11488
return err;
11589
}
11690

117-
protected void onParsingError(String error, ParserResult result) throws Exception
118-
{
119-
/* Until we move past the current block to the next block, parser remains in invalid
120-
* state. */
121-
mIsInvalidState = true;
122-
123-
/* Call base class method */
124-
super.onParsingError (error, result);
125-
}
126-
127-
protected void onParsingStart() throws Exception
128-
{
129-
if (this.mIsInvalidState)
130-
throw new ParserException ("Invalid parser state : On an invalid line.");
131-
132-
/* Call base class method */
133-
super.onParsingStart();
134-
}
135-
13691
/* Class methods */
13792

13893
/**
13994
* Validates Book Title line and adds to ParserResult and returns true is valid.
14095
* If validation fails, false is returned.
14196
*/
142-
protected ParsingErrors parseTitleLine (ParserResult result) throws IOException, ParserException
97+
protected ParsingErrors parseTitleLine (ParserResult result)
98+
throws IOException, ParserException
14399
{
144100
/* Read current line. Cannot be EOF.*/
145-
String linestr = readLineWithProperEncoding();
101+
String linestr = this.lastLineRead();
146102
if (linestr == null)
147103
return ParsingErrors.PARSING_ERROR;
148104

@@ -163,15 +119,15 @@ protected ParsingErrors parseAnnotationLine (ParserResult result)
163119
throws IOException, ParserException
164120
{
165121
/* Read current line. Cannot be EOF.*/
166-
String linestr = readLineWithProperEncoding();
122+
String linestr = this.lastLineRead();
167123
if (linestr == null)
168124
return ParsingErrors.PARSING_ERROR;
169125

170126
boolean isValid = false;
171127
String value = "";
172128

173129
/* Annotation Type */
174-
value = trySplitString (linestr, " ", 2);
130+
value = trySplitString (linestr, " ", mConstants.getAnnotationLineTypePosition());
175131
isValid = (value != null)
176132
&& (value.toLowerCase().equals ("highlight")
177133
|| value.toLowerCase().equals ("note")
@@ -184,7 +140,7 @@ protected ParsingErrors parseAnnotationLine (ParserResult result)
184140
String annotationType = value;
185141

186142
/* Page Number Type */
187-
value = trySplitString (linestr, " ", 4);
143+
value = trySplitString (linestr, " ", mConstants.getAnnotationLinePageNumberTypePosition());
188144
isValid = (value != null)
189145
&& (value.toLowerCase().equals("page")
190146
|| value.toLowerCase().equals("location"));
@@ -195,7 +151,7 @@ protected ParsingErrors parseAnnotationLine (ParserResult result)
195151
result.setFieldValue (SupportedFields.PAGE_NUMBER_TYPE, value);
196152

197153
/* Page or Location Number */
198-
value = trySplitString (linestr, " ", 5);
154+
value = trySplitString (linestr, " ", mConstants.getAnnotationLinePageOrLocationNumberPosition());
199155
isValid = (value != null);
200156
if (isValid == false)
201157
return ParsingErrors.PARSING_ERROR;
@@ -223,7 +179,7 @@ protected ParsingErrors parseAnnotationLine (ParserResult result)
223179
protected ParsingErrors parseTextLine (ParserResult result) throws IOException, ParserException
224180
{
225181
/* Read current line. Cannot be EOF.*/
226-
String linestr = readLineWithProperEncoding();
182+
String linestr = this.lastLineRead();
227183
if (linestr == null)
228184
return ParsingErrors.PARSING_ERROR;
229185

@@ -268,7 +224,7 @@ protected ParsingErrors parseTerminationLine (ParserResult result)
268224
throws IOException, ParserException
269225
{
270226
/* Read current line. Cannot be EOF.*/
271-
String linestr = readLineWithProperEncoding();
227+
String linestr = this.lastLineRead();
272228
if (linestr == null)
273229
return ParsingErrors.PARSING_ERROR;
274230

@@ -277,12 +233,6 @@ protected ParsingErrors parseTerminationLine (ParserResult result)
277233
return (isValid == true) ? ParsingErrors.NO_ERROR : ParsingErrors.PARSING_ERROR;
278234
}
279235

280-
protected boolean isTerminationLine (String linestr)
281-
{
282-
assert (linestr != null);
283-
return linestr.equals("==========");
284-
}
285-
286236
protected String trySplitString (String s, String p, int index)
287237
{
288238
assert (s != null);

0 commit comments

Comments
 (0)