# This file is part of pymarc. It is subject to the license terms in the
# LICENSE file found in the top-level directory of this distribution and at
# https://opensource.org/licenses/BSD-2-Clause. pymarc may be copied, modified,
# propagated, or distributed according to the terms contained in the LICENSE
# file.
"""The pymarc field file."""
import logging
from collections import defaultdict
from typing import List, Optional, DefaultDict, NamedTuple, Iterator, Dict, Sequence
from pymarc.constants import SUBFIELD_INDICATOR, END_OF_FIELD
from pymarc.marc8 import marc8_to_unicode
logger = logging.getLogger("pymarc")
Subfield = NamedTuple("Subfield", [("code", str), ("value", str)])
[docs]
class Indicators(NamedTuple):
"""A named tuple representing the indicators for a non-control field."""
first: str
second: str
[docs]
class Field:
"""Field() pass in the field tag, indicators and subfields for the tag.
.. code-block:: python
field = Field(
tag = '245',
indicators = Indicators('0','1'),
subfields = [
Subfield(code='a', value='The pragmatic programmer : '),
Subfield(code='b', value='from journeyman to master /'),
Subfield(code='c', value='Andrew Hunt, David Thomas.'),
])
If you want to create a control field, don't pass in the indicators
and use a data parameter rather than a subfields parameter:
.. code-block:: python
field = Field(tag='001', data='fol05731351')
"""
__slots__ = ("tag", "data", "_indicators", "subfields", "__pos", "control_field")
def __init__(
self,
tag: str,
indicators: Optional[Indicators] = None,
subfields: Optional[List[Subfield]] = None,
data: Optional[str] = None,
):
"""Initialize a field `tag`."""
# attempt to normalize integer tags if necessary
try:
self.tag = f"{int(tag):03}"
except ValueError:
self.tag = f"{tag}"
if subfields and isinstance(subfields[0], str):
raise ValueError(
"""The subfield input no longer accepts strings, and should use Subfield.
Please consult the documentation for details.
"""
)
self.subfields: List[Subfield] = []
self._indicators: Optional[Indicators] = None
self.data: Optional[str] = None
self.control_field: bool = False
# assume control fields are numeric only; replicates ruby-marc behavior
if self.tag < "010" and self.tag.isdigit():
self.control_field = True
self.data = data
else:
self.subfields = subfields or []
if not indicators:
self.indicators = Indicators(" ", " ")
elif (
indicators
and isinstance(indicators, (list, tuple))
and len(indicators) == 2
):
self.indicators = Indicators(*indicators)
else:
self.indicators = indicators
@property
def indicators(self) -> Optional[Indicators]:
"""Return the field's indicators."""
return self._indicators
@indicators.setter
def indicators(self, value: Sequence) -> None:
"""Set the field's indicators."""
if value and isinstance(value, (list, tuple)) and len(value) != 2:
raise ValueError(
"""The indicators input no longer accepts an iterable of arbitrary length. Use
the Indicators() named tuple instead. Please consult the documentation
for details.
"""
)
if value is not None:
if isinstance(value, Indicators):
self._indicators = value
else:
self._indicators = Indicators(*value)
[docs]
@classmethod
def convert_legacy_subfields(cls, subfields: List[str]) -> List[Subfield]:
"""
Converts older-style subfield lists into Subfield lists.
Converts the old-style list of strings into a list of Subfields.
As a class method this does not actually set any fields; it simply
takes a list of strings and returns a list of Subfields.
.. code-block:: python
legacy_fields: list[str] = ['a', 'The pragmatic programmer : ',
'b', 'from journeyman to master /',
'c', 'Andrew Hunt, David Thomas']
coded_fields: list[Subfield] = Field.convert_legacy_subfields(legacy_fields)
myfield = Field(
tag="245",
indicators = ['0','1'],
subfields=coded_fields
)
:param subfields: A list of [code, value, code, value]
:return: A list of Subfield named tuples
"""
# Make an iterator out of the incoming subfields.
subf_it: Iterator[str] = iter(subfields)
# This creates a tuple based on the next value of the iterator. In this case,
# the subfield code will be the first element, and then the subfield value
# will be the second.
subf = zip(subf_it, subf_it)
# Create a coded subfield tuple of each (code, value) item in the incoming
# subfields.
return [Subfield._make(t) for t in subf]
def __iter__(self):
self.__pos = 0
return self
def __str__(self) -> str:
"""String representation of the field.
A Field object in a string context will return the tag, indicators
and subfield as a string. This follows MARCMaker format; see [1]
and [2] for further reference. Special character mnemonic strings
have yet to be implemented (see [3]), so be forewarned. Note also
for complete MARCMaker compatibility, you will need to change your
newlines to DOS format ('CRLF').
[1] http://www.loc.gov/marc/makrbrkr.html#mechanics
[2] http://search.cpan.org/~eijabb/MARC-File-MARCMaker/
[3] http://www.loc.gov/marc/mnemonics.html
"""
if self.control_field:
_data: str = self.data.replace(" ", "\\") if self.data else ""
return f"={self.tag} {_data}"
else:
_ind = []
_subf = []
for indicator in self.indicators: # type: ignore
if indicator in (" ", "\\"):
_ind.append("\\")
else:
_ind.append(f"{indicator}")
for subfield in self.subfields:
_subf.append(f"${subfield.code}{subfield.value}")
return f"={self.tag} {''.join(_ind)}{''.join(_subf)}"
[docs]
def get(self, code: str, default=None):
"""A dict-like get method with a default value.
Implements a non-raising getter for a subfield code that will
return the value of the first subfield whose code is `key`. Returns
the default value if the field is a control field.
"""
if self.control_field:
return default
if code not in self:
return default
return self[code]
def __getitem__(self, code: str) -> str:
"""Retrieve the first subfield with a given subfield code in a field.
Raises KeyError if `code` is not in the Field. If the field is a control
field, also raise a KeyError.
.. code-block:: python
field['a']
"""
if self.control_field:
raise KeyError
if code not in self:
raise KeyError
for subf in self.subfields:
if subf.code == code:
return subf.value
# This should not occur, but just incase we've looped through
# and couldn't find the code, default to raising KeyError.
raise KeyError
def __contains__(self, subfield: str) -> bool:
"""Allows a shorthand test of field membership.
If the field is a control field, returns False.
.. code-block:: python
'a' in field
"""
if self.control_field:
return False
for s in self.subfields:
if s.code == subfield:
return True
return False
def __setitem__(self, code: str, value: str) -> None:
"""Set the values of the subfield code in a field.
.. code-block:: python
field['a'] = 'value'
Raises KeyError if there is more than one subfield code, or
if there is an attempt to set a subfield on a control field.
"""
if self.control_field:
raise KeyError("field is a control field")
num_subfields: int = [x.code for x in self.subfields].count(code)
if num_subfields > 1:
raise KeyError(f"more than one code '{code}'")
elif num_subfields == 0:
raise KeyError(f"no code '{code}'")
for idx, subf in enumerate(self.subfields):
if subf.code == code:
new_val = Subfield(code=subf.code, value=value)
self.subfields[idx] = new_val
break
def __next__(self) -> Subfield:
if self.control_field:
raise StopIteration
try:
subfield = self.subfields[self.__pos]
self.__pos += 1
return subfield # type: ignore
except IndexError:
raise StopIteration
[docs]
def value(self) -> str:
"""Returns the field's subfields (or data in the case of control fields) as a string."""
if self.control_field:
return self.data or ""
return " ".join(subfield.value.strip() for subfield in self.subfields)
[docs]
def get_subfields(self, *codes) -> List[str]:
"""Get subfields matching `codes`.
get_subfields() accepts one or more subfield codes and returns
a list of subfield values. The order of the subfield values
in the list will be the order that they appear in the field.
.. code-block:: python
print(field.get_subfields('a'))
print(field.get_subfields('a', 'b', 'z'))
"""
if self.control_field:
return []
return [subfield.value for subfield in self.subfields if subfield.code in codes]
[docs]
def add_subfield(self, code: str, value: str, pos=None) -> None:
"""Adds a subfield code/value to the end of a field or at a position (pos).
If pos is not supplied or out of range, the subfield will be added at the end.
If the field is a control field, nothing will happen.
.. code-block:: python
field.add_subfield('u', 'http://www.loc.gov')
field.add_subfield('u', 'http://www.loc.gov', 0)
"""
if self.control_field:
return None
append: bool = pos is None or pos > len(self.subfields)
insertable: Subfield = Subfield(code=code, value=value)
if append:
self.subfields.append(insertable)
else:
self.subfields.insert(pos, insertable)
[docs]
def delete_subfield(self, code: str) -> Optional[str]:
"""Deletes the first subfield with the specified 'code' and returns its value.
.. code-block:: python
value = field.delete_subfield('a')
If no subfield is found with the specified code None is returned.
"""
if self.control_field:
return None
if code not in self:
return None
index: int = [s.code for s in self.subfields].index(code)
whole_field: Subfield = self.subfields.pop(index)
return whole_field.value
[docs]
def subfields_as_dict(self) -> Dict[str, List]:
"""Returns the subfields as a dictionary.
Returns an empty dictionary if the field is a control field.
The dictionary is a mapping of subfield codes and values. Since
subfield codes can repeat the values are a list.
"""
if self.control_field:
return {}
subs: DefaultDict[str, List] = defaultdict(list)
for field in self.subfields:
subs[field.code].append(field.value)
return dict(subs)
[docs]
def is_control_field(self) -> bool:
"""Returns true or false if the field is considered a control field.
Prefer using the `control_field` property directly instead of this,
which has been retained for legacy compatibility.
Control fields lack indicators and subfields.
"""
return self.control_field
[docs]
def linkage_occurrence_num(self) -> Optional[str]:
"""Return the 'occurrence number' part of subfield 6, or None if not present."""
ocn = self.get("6", "")
return ocn.split("-")[1].split("/")[0] if ocn else None
[docs]
def as_marc(self, encoding: str) -> bytes:
"""Used during conversion of a field to raw marc."""
if self.control_field:
return f"{self.data}{END_OF_FIELD}".encode(encoding)
_subf = []
for subfield in self.subfields:
_subf.append(f"{SUBFIELD_INDICATOR}{subfield.code}{subfield.value}")
return (
f"{self.indicator1}{self.indicator2}{''.join(_subf)}{END_OF_FIELD}".encode(
encoding
)
)
# alias for backwards compatibility
as_marc21 = as_marc
[docs]
def is_subject_field(self) -> bool:
"""Returns True or False if the field is considered a subject field.
Used by :func:`format_field() <pymarc.field.Field.format_field>` .
"""
return self.tag.startswith("6")
@property
def indicator1(self) -> str:
"""Indicator 1.
Returns an empty string if this is a control field.
"""
return self.indicators.first if self.indicators else ""
@indicator1.setter
def indicator1(self, value: str) -> None:
"""Indicator 1 (setter).
If this is a control field, this is a NoOp.
"""
if self.control_field:
self.indicators = None
self.indicators = self.indicators._replace(first=value) # type: ignore
@property
def indicator2(self) -> str:
"""Indicator 2.
Returns an empty string if this is a control field.
"""
return self.indicators.second if self.indicators else ""
@indicator2.setter
def indicator2(self, value: str) -> None:
"""Indicator 2 (setter).
If this is a control field, this is a NoOp.
"""
if self.control_field:
self.indicators = None
self.indicators = self.indicators._replace(second=value) # type: ignore
[docs]
class RawField(Field):
"""MARC field that keeps data in raw, un-decoded byte strings.
Should only be used when input records are wrongly encoded.
"""
[docs]
def as_marc(self, encoding: Optional[str] = None):
"""Used during conversion of a field to raw MARC."""
if encoding is not None:
logger.warning("Attempt to force a RawField into encoding %s", encoding)
if self.control_field:
return self.data + END_OF_FIELD.encode("ascii") # type: ignore
marc: bytes = self.indicator1.encode("ascii") + self.indicator2.encode("ascii")
for subfield in self.subfields:
marc += (
SUBFIELD_INDICATOR.encode("ascii")
+ subfield.code.encode("ascii")
+ subfield.value # type: ignore
)
return marc + END_OF_FIELD.encode("ascii")
[docs]
def map_marc8_field(f: Field) -> Field:
"""Map MARC8 field."""
if f.control_field:
f.data = marc8_to_unicode(f.data)
else:
f.subfields = [
Subfield(subfield.code, marc8_to_unicode(subfield.value))
for subfield in f.subfields
]
return f