@Singleton @Named public class ContentParserPpt extends AbstractContentParserPoi
ContentParser
interface for binary
MS-Powerpoint documents.Modifier and Type | Field and Description |
---|---|
protected static String |
ENCODING_UTF16LE
encoding name
|
static String |
KEY_EXTENSION
The default extension.
|
static String |
KEY_MIMETYPE
The mimetype.
|
protected static int |
PPT_RECORD_INFO_OFFSET
first record item: info (little endian short)
|
protected static int |
PPT_RECORD_LENGTH
third record item: size (little endian int)
|
protected static int |
PPT_RECORD_SIZE_OFFSET
third record item: size (little endian int)
|
protected static int |
PPT_RECORD_TYPE_OFFSET
second record item: type (little endian short)
|
protected static int |
PPT_TYPE_CHAR_STRING
type of character stream
|
protected static int |
PPT_TYPE_DOCUMENT
type of document
|
protected static int |
PPT_TYPE_DOCUMENT_ATOM
type of document atom - ignore
|
protected static int |
PPT_TYPE_DRAWING
type of drawing - ignore
|
protected static int |
PPT_TYPE_DRAWING_GROUP
type of drawing group - ignore
|
protected static int |
PPT_TYPE_ENVIRONMENT
type of environment - ignore
|
protected static int |
PPT_TYPE_ESCHER_DG
type of escher dg
|
protected static int |
PPT_TYPE_ESCHER_DG_CONTAINER
type of escher dg container
|
protected static int |
PPT_TYPE_ESCHER_OPT
type of escher opt
|
protected static int |
PPT_TYPE_ESCHER_SP
type of escher sp
|
protected static int |
PPT_TYPE_ESCHER_SP_CONTAINER
type of escher sp container
|
protected static int |
PPT_TYPE_ESCHER_SPGR_CONTAINER
type of escher spgr container
|
protected static int |
PPT_TYPE_ESCHER_TEXTBOX
type of escher textbox
|
protected static int |
PPT_TYPE_EXTENDED_OBJECT_LIST
type of extended object list - ignore
|
protected static int |
PPT_TYPE_HEADER_FOOTER
type of header/footer
|
protected static int |
PPT_TYPE_HEADER_FOOTER_ATOM
type of header/footer atom
|
protected static int |
PPT_TYPE_INTERACTIVE_INFO
type of interactive info
|
protected static int |
PPT_TYPE_LIST
type of list - ignore
|
protected static int |
PPT_TYPE_MAIN_MASTER
type of main master - ignore
|
protected static int |
PPT_TYPE_NOTES
type of notes
|
protected static int |
PPT_TYPE_NOTES_ATOM
type of notes atom
|
protected static int |
PPT_TYPE_SLIDE
type of slide
|
protected static int |
PPT_TYPE_SLIDE_ATOM
type of slide atom
|
protected static int |
PPT_TYPE_SLIDE_LIST_WITH_TEXT
type of slide-list
|
protected static int |
PPT_TYPE_SLIDE_PERSIST_ATOM
type of slide persist atom - ignore
|
protected static int |
PPT_TYPE_SPEC_INFO_ATOM
type of spec info atom - ignore
|
protected static int |
PPT_TYPE_STYLE_TEXT_PROPERTY_ATOM
type of style text property atom
|
protected static int |
PPT_TYPE_TEXT_BYTES_ATOM
type of text bytes atom
|
protected static int |
PPT_TYPE_TEXT_CHARS_ATOM
type of text chars atom
|
protected static int |
PPT_TYPE_TEXT_HEADER_ATOM
type of text header atom
|
protected static int |
PPT_TYPE_TX_INTERACTIVE_INFO_ATOM
type of interactive info
|
POIFS_EXCEL_DOC, POIFS_POWERPOINT_DOC, POIFS_WORD_DOC
VARIABLE_NAME_CREATOR, VARIABLE_NAME_KEYWORDS, VARIABLE_NAME_LANGUAGE, VARIABLE_NAME_TEXT, VARIABLE_NAME_TITLE
Constructor and Description |
---|
ContentParserPpt()
The constructor.
|
Modifier and Type | Method and Description |
---|---|
private void |
extractRecursive(byte[] buffer,
int offset,
int length,
StringBuffer textBuffer) |
protected String |
extractText(org.apache.poi.poifs.filesystem.POIFSFileSystem poiFs,
long filesize,
ContentParserOptions options)
This method extracts the text from the office document given by
poiFs . |
String[] |
getAlternativeKeyArray()
|
String |
getExtension()
This method gets the default filename extension excluding the dot (e.g.
|
String |
getMimetype()
This method gets the default mimetype (e.g.
|
parse
doInitialize, getPrimaryKeys, getSecondaryKeyArray, getSecondaryKeys, parse, parse, setGenericContextFactory
createLogger, getLogger
doInitialized, getInitializationState, initialize
public static final String KEY_MIMETYPE
public static final String KEY_EXTENSION
protected static final int PPT_TYPE_DOCUMENT
protected static final int PPT_TYPE_DOCUMENT_ATOM
protected static final int PPT_TYPE_SLIDE
protected static final int PPT_TYPE_SLIDE_ATOM
protected static final int PPT_TYPE_NOTES
protected static final int PPT_TYPE_NOTES_ATOM
protected static final int PPT_TYPE_ENVIRONMENT
protected static final int PPT_TYPE_SLIDE_PERSIST_ATOM
protected static final int PPT_TYPE_MAIN_MASTER
protected static final int PPT_TYPE_EXTENDED_OBJECT_LIST
protected static final int PPT_TYPE_DRAWING_GROUP
protected static final int PPT_TYPE_DRAWING
protected static final int PPT_TYPE_LIST
protected static final int PPT_TYPE_TEXT_HEADER_ATOM
protected static final int PPT_TYPE_TEXT_CHARS_ATOM
protected static final int PPT_TYPE_STYLE_TEXT_PROPERTY_ATOM
protected static final int PPT_TYPE_TEXT_BYTES_ATOM
protected static final int PPT_TYPE_SPEC_INFO_ATOM
protected static final int PPT_TYPE_CHAR_STRING
protected static final int PPT_TYPE_HEADER_FOOTER
protected static final int PPT_TYPE_HEADER_FOOTER_ATOM
protected static final int PPT_TYPE_TX_INTERACTIVE_INFO_ATOM
protected static final int PPT_TYPE_SLIDE_LIST_WITH_TEXT
protected static final int PPT_TYPE_INTERACTIVE_INFO
protected static final int PPT_TYPE_ESCHER_DG_CONTAINER
protected static final int PPT_TYPE_ESCHER_SPGR_CONTAINER
protected static final int PPT_TYPE_ESCHER_SP_CONTAINER
protected static final int PPT_TYPE_ESCHER_DG
protected static final int PPT_TYPE_ESCHER_SP
protected static final int PPT_TYPE_ESCHER_OPT
protected static final int PPT_TYPE_ESCHER_TEXTBOX
protected static final int PPT_RECORD_INFO_OFFSET
protected static final int PPT_RECORD_TYPE_OFFSET
protected static final int PPT_RECORD_SIZE_OFFSET
protected static final int PPT_RECORD_LENGTH
protected static final String ENCODING_UTF16LE
public String getExtension()
ContentParser
.null
if this is the
generic parser
.public String getMimetype()
ContentParser
.null
if this is the
generic parser
.public String[] getAlternativeKeyArray()
getAlternativeKeyArray
in class AbstractContentParser
AbstractContentParser.getPrimaryKeys()
private void extractRecursive(byte[] buffer, int offset, int length, StringBuffer textBuffer) throws UnsupportedEncodingException
buffer
- offset
- length
- textBuffer
- UnsupportedEncodingException
protected String extractText(org.apache.poi.poifs.filesystem.POIFSFileSystem poiFs, long filesize, ContentParserOptions options) throws Exception
poiFs
.extractText
in class AbstractContentParserPoi
poiFs
- is the POI filesystem of the office document.filesize
- is the size (content-length) of the content to parse in
bytes or 0
if NOT available (unknown). If available,
the parser may use this value for optimized allocations.options
- are the ContentParserOptions
.Exception
- if something goes wrong.Copyright © 2001–2016 mmm-Team. All rights reserved.