PATH:
opt
/
bitninja-python-dojo
/
embedded
/
lib
/
python3.9
/
encodings
""" Python 'utf-8-sig' Codec This work similar to UTF-8 with the following changes: * On encoding/writing a UTF-8 encoded BOM will be prepended/written as the first three bytes. * On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these bytes will be skipped. """ import codecs ### Codec APIs def encode(input, errors='strict'): return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) def decode(input, errors='strict'): prefix = 0 if input[:3] == codecs.BOM_UTF8: input = input[3:] prefix = 3 (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix) class IncrementalEncoder(codecs.IncrementalEncoder): def __init__(self, errors='strict'): codecs.IncrementalEncoder.__init__(self, errors) self.first = 1 def encode(self, input, final=False): if self.first: self.first = 0 return codecs.BOM_UTF8 + \ codecs.utf_8_encode(input, self.errors)[0] else: return codecs.utf_8_encode(input, self.errors)[0] def reset(self): codecs.IncrementalEncoder.reset(self) self.first = 1 def getstate(self): return self.first def setstate(self, state): self.first = state class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def __init__(self, errors='strict'): codecs.BufferedIncrementalDecoder.__init__(self, errors) self.first = 1 def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return ("", 0) else: self.first = 0 else: self.first = 0 if input[:3] == codecs.BOM_UTF8: (output, consumed) = \ codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final) def reset(self): codecs.BufferedIncrementalDecoder.reset(self) self.first = 1 def getstate(self): state = codecs.BufferedIncrementalDecoder.getstate(self) # state[1] must be 0 here, as it isn't passed along to the caller return (state[0], self.first) def setstate(self, state): # state[1] will be ignored by BufferedIncrementalDecoder.setstate() codecs.BufferedIncrementalDecoder.setstate(self, state) self.first = state[1] class StreamWriter(codecs.StreamWriter): def reset(self): codecs.StreamWriter.reset(self) try: del self.encode except AttributeError: pass def encode(self, input, errors='strict'): self.encode = codecs.utf_8_encode return encode(input, errors) class StreamReader(codecs.StreamReader): def reset(self): codecs.StreamReader.reset(self) try: del self.decode except AttributeError: pass def decode(self, input, errors='strict'): if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return ("", 0) elif input[:3] == codecs.BOM_UTF8: self.decode = codecs.utf_8_decode (output, consumed) = codecs.utf_8_decode(input[3:],errors) return (output, consumed+3) # (else) no BOM present self.decode = codecs.utf_8_decode return codecs.utf_8_decode(input, errors) ### encodings module API def getregentry(): return codecs.CodecInfo( name='utf-8-sig', encode=encode, decode=decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, )
[-] palmos.py
[edit]
[-] iso8859_3.py
[edit]
[-] kz1048.py
[edit]
[-] euc_jis_2004.py
[edit]
[-] iso8859_15.py
[edit]
[-] cp1256.py
[edit]
[-] mac_croatian.py
[edit]
[-] raw_unicode_escape.py
[edit]
[-] unicode_escape.py
[edit]
[-] utf_32_be.py
[edit]
[-] mac_roman.py
[edit]
[-] iso8859_14.py
[edit]
[-] cp862.py
[edit]
[-] charmap.py
[edit]
[-] iso2022_jp_2004.py
[edit]
[-] iso8859_5.py
[edit]
[-] cp1251.py
[edit]
[-] iso8859_10.py
[edit]
[-] koi8_u.py
[edit]
[-] hz.py
[edit]
[-] oem.py
[edit]
[-] cp037.py
[edit]
[-] euc_jisx0213.py
[edit]
[-] utf_32_le.py
[edit]
[-] cp1026.py
[edit]
[-] iso2022_jp_ext.py
[edit]
[-] iso8859_11.py
[edit]
[-] latin_1.py
[edit]
[-] utf_32.py
[edit]
[-] quopri_codec.py
[edit]
[+]
..
[-] tis_620.py
[edit]
[-] rot_13.py
[edit]
[-] cp874.py
[edit]
[-] shift_jis.py
[edit]
[-] iso8859_2.py
[edit]
[-] gb18030.py
[edit]
[-] cp864.py
[edit]
[-] cp875.py
[edit]
[-] cp1125.py
[edit]
[-] cp1254.py
[edit]
[-] undefined.py
[edit]
[-] iso8859_6.py
[edit]
[-] cp1006.py
[edit]
[-] cp860.py
[edit]
[-] mac_farsi.py
[edit]
[-] hex_codec.py
[edit]
[-] mac_cyrillic.py
[edit]
[-] cp1257.py
[edit]
[-] ptcp154.py
[edit]
[-] cp949.py
[edit]
[-] cp861.py
[edit]
[-] mac_arabic.py
[edit]
[-] zlib_codec.py
[edit]
[-] iso8859_8.py
[edit]
[-] gb2312.py
[edit]
[-] utf_8_sig.py
[edit]
[-] gbk.py
[edit]
[-] big5hkscs.py
[edit]
[-] mac_iceland.py
[edit]
[-] base64_codec.py
[edit]
[-] aliases.py
[edit]
[-] iso8859_7.py
[edit]
[-] cp424.py
[edit]
[-] utf_16_le.py
[edit]
[-] cp437.py
[edit]
[-] iso8859_16.py
[edit]
[-] cp866.py
[edit]
[-] euc_kr.py
[edit]
[-] cp500.py
[edit]
[-] iso2022_jp_3.py
[edit]
[-] cp1250.py
[edit]
[-] cp869.py
[edit]
[-] shift_jisx0213.py
[edit]
[-] utf_8.py
[edit]
[-] cp737.py
[edit]
[+]
__pycache__
[-] cp850.py
[edit]
[-] iso2022_jp_1.py
[edit]
[-] cp1255.py
[edit]
[-] cp863.py
[edit]
[-] cp858.py
[edit]
[-] koi8_r.py
[edit]
[-] mac_greek.py
[edit]
[-] cp1253.py
[edit]
[-] cp273.py
[edit]
[-] euc_jp.py
[edit]
[-] punycode.py
[edit]
[-] ascii.py
[edit]
[-] cp932.py
[edit]
[-] shift_jis_2004.py
[edit]
[-] bz2_codec.py
[edit]
[-] utf_7.py
[edit]
[-] cp855.py
[edit]
[-] uu_codec.py
[edit]
[-] utf_16.py
[edit]
[-] koi8_t.py
[edit]
[-] cp1258.py
[edit]
[-] cp950.py
[edit]
[-] cp857.py
[edit]
[-] cp1140.py
[edit]
[-] cp852.py
[edit]
[-] hp_roman8.py
[edit]
[-] iso8859_9.py
[edit]
[-] idna.py
[edit]
[-] __init__.py
[edit]
[-] mac_turkish.py
[edit]
[-] utf_16_be.py
[edit]
[-] cp856.py
[edit]
[-] cp720.py
[edit]
[-] iso8859_4.py
[edit]
[-] iso8859_13.py
[edit]
[-] mbcs.py
[edit]
[-] johab.py
[edit]
[-] cp775.py
[edit]
[-] iso2022_kr.py
[edit]
[-] big5.py
[edit]
[-] iso2022_jp.py
[edit]
[-] iso8859_1.py
[edit]
[-] iso2022_jp_2.py
[edit]
[-] mac_romanian.py
[edit]
[-] cp1252.py
[edit]
[-] cp865.py
[edit]
[-] mac_latin2.py
[edit]