// Copyright 2005-2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the 'License'); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an 'AS IS' BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Class to map over/transform arcs e.g., change semirings or // implement project/invert. Consider using when operation does // not change the number of arcs (except possibly superfinal arcs). #ifndef FST_ARC_MAP_H_ #define FST_ARC_MAP_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace fst { // Determines how final weights are mapped. enum MapFinalAction { // A final weight is mapped into a final weight. An error is raised if this // is not possible. MAP_NO_SUPERFINAL, // A final weight is mapped to an arc to the superfinal state when the result // cannot be represented as a final weight. The superfinal state will be // added only if it is needed. MAP_ALLOW_SUPERFINAL, // A final weight is mapped to an arc to the superfinal state unless the // result can be represented as a final weight of weight Zero(). The // superfinal state is always added (if the input is not the empty FST). MAP_REQUIRE_SUPERFINAL }; // Determines how symbol tables are mapped. enum MapSymbolsAction { // Symbols should be cleared in the result by the map. MAP_CLEAR_SYMBOLS, // Symbols should be copied from the input FST by the map. MAP_COPY_SYMBOLS, // Symbols should not be modified in the result by the map itself. // (They may set by the mapper). MAP_NOOP_SYMBOLS }; // Determines whether to propagate the kExpanded property, and by extension, // whether the `ArcMapFst` is an `ExpandedFst` or not. enum class PropagateKExpanded { // The `ArcMapFst` will neither be an `ExpandedFst` nor require one as input. kNo, // The `ArcMapFst` will both be an `ExpandedFst` and require one as input, as // long as the mapper it is templated on can support this. kIfPossible // TODO(wolfsonkin): Add a kYes option. }; // The ArcMapper interfaces defines how arcs and final weights are mapped. // This is useful for implementing operations that apply to each arc separately // and do not change the number of arcs (except possibly superfinal arcs). // // Having the default constructor, `Properties`, and `FinalAction` as constexpr // can allow for a special optimization to be taken for `ArcMapFst` where it can // be an `ExpandedFst`. // // template // class ArcMapper { // public: // using FromArc = A; // using ToArc = B; // // // Maps an arc type FromArc to arc type ToArc. // ToArc operator()(const FromArc &arc); // // // Specifies final action the mapper requires (see above). // // The mapper will be passed final weights as arcs of the form // // Arc(0, 0, weight, kNoStateId). // MapFinalAction FinalAction() const; // // // Specifies input symbol table action the mapper requires (see above). // MapSymbolsAction InputSymbolsAction() const; // // // Specifies output symbol table action the mapper requires (see above). // MapSymbolsAction OutputSymbolsAction() const; // // // This specifies the known properties of an FST mapped by this mapper. It // takes as argument the input FSTs's known properties. // uint64_t Properties(uint64_t props) const; // }; // // The ArcMap functions and classes below will use the FinalAction() // method of the mapper to determine how to treat final weights, e.g., whether // to add a superfinal state. They will use the Properties() method to set the // result FST properties. // // We include a various map versions below. One dimension of variation is // whether the mapping mutates its input, writes to a new result FST, or is an // on-the-fly FST. Another dimension is how we pass the mapper. We allow passing // the mapper by pointer for cases that we need to change the state of the // user's mapper. This is the case with the EncodeMapper, which is reused // during decoding. We also include map versions that pass the mapper by value // or const reference when this suffices. // Maps an arc type A using a mapper function object C, passed // by pointer. This version modifies its Fst input. template void ArcMap(MutableFst *fst, C *mapper) { using FromArc = A; using ToArc = A; using Weight = typename FromArc::Weight; if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) { fst->SetInputSymbols(nullptr); } if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) { fst->SetOutputSymbols(nullptr); } if (fst->Start() == kNoStateId) return; const auto props = fst->Properties(kFstProperties, false); const auto final_action = mapper->FinalAction(); auto superfinal = kNoStateId; if (final_action == MAP_REQUIRE_SUPERFINAL) { superfinal = fst->AddState(); fst->SetFinal(superfinal); } for (StateIterator> siter(*fst); !siter.Done(); siter.Next()) { const auto state = siter.Value(); for (MutableArcIterator> aiter(fst, state); !aiter.Done(); aiter.Next()) { const auto &arc = aiter.Value(); aiter.SetValue((*mapper)(arc)); } switch (final_action) { case MAP_NO_SUPERFINAL: default: { const FromArc arc(0, 0, fst->Final(state), kNoStateId); const auto final_arc = (*mapper)(arc); if (final_arc.ilabel != 0 || final_arc.olabel != 0) { FSTERROR() << "ArcMap: Non-zero arc labels for superfinal arc"; fst->SetProperties(kError, kError); } fst->SetFinal(state, final_arc.weight); break; } case MAP_ALLOW_SUPERFINAL: { if (state != superfinal) { const FromArc arc(0, 0, fst->Final(state), kNoStateId); auto final_arc = (*mapper)(arc); if (final_arc.ilabel != 0 || final_arc.olabel != 0) { // Add a superfinal state if not already done. if (superfinal == kNoStateId) { superfinal = fst->AddState(); fst->SetFinal(superfinal); } final_arc.nextstate = superfinal; fst->AddArc(state, std::move(final_arc)); fst->SetFinal(state, Weight::Zero()); } else { fst->SetFinal(state, final_arc.weight); } } break; } case MAP_REQUIRE_SUPERFINAL: { if (state != superfinal) { const FromArc arc(0, 0, fst->Final(state), kNoStateId); const auto final_arc = (*mapper)(arc); if (final_arc.ilabel != 0 || final_arc.olabel != 0 || final_arc.weight != Weight::Zero()) { fst->AddArc(state, ToArc(final_arc.ilabel, final_arc.olabel, final_arc.weight, superfinal)); } fst->SetFinal(state, Weight::Zero()); } break; } } } fst->SetProperties(mapper->Properties(props), kFstProperties); } // Maps an arc type A using a mapper function object C, passed by value. This // version modifies its FST input. template void ArcMap(MutableFst *fst, C mapper) { ArcMap(fst, &mapper); } // Maps an arc type A to an arc type B using mapper function object C, // passed by pointer. This version writes the mapped input FST to an // output MutableFst. template void ArcMap(const Fst &ifst, MutableFst *ofst, C *mapper) { using FromArc = A; using StateId = typename FromArc::StateId; ofst->DeleteStates(); if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS) { ofst->SetInputSymbols(ifst.InputSymbols()); } else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) { ofst->SetInputSymbols(nullptr); } if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS) { ofst->SetOutputSymbols(ifst.OutputSymbols()); } else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) { ofst->SetOutputSymbols(nullptr); } const auto iprops = ifst.Properties(kCopyProperties, false); if (ifst.Start() == kNoStateId) { if (iprops & kError) ofst->SetProperties(kError, kError); return; } const auto final_action = mapper->FinalAction(); if (std::optional num_states = ifst.NumStatesIfKnown()) { ofst->ReserveStates(*num_states + (final_action == MAP_NO_SUPERFINAL ? 0 : 1)); } // Adds all states. for (StateIterator> siter(ifst); !siter.Done(); siter.Next()) { ofst->AddState(); } StateId superfinal = kNoStateId; if (final_action == MAP_REQUIRE_SUPERFINAL) { superfinal = ofst->AddState(); ofst->SetFinal(superfinal); } for (StateIterator> siter(ifst); !siter.Done(); siter.Next()) { StateId s = siter.Value(); if (s == ifst.Start()) ofst->SetStart(s); ofst->ReserveArcs( s, ifst.NumArcs(s) + (final_action != MAP_NO_SUPERFINAL ? 1 : 0)); for (ArcIterator> aiter(ifst, s); !aiter.Done(); aiter.Next()) { ofst->AddArc(s, (*mapper)(aiter.Value())); } switch (final_action) { case MAP_NO_SUPERFINAL: default: { B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); if (final_arc.ilabel != 0 || final_arc.olabel != 0) { FSTERROR() << "ArcMap: Non-zero arc labels for superfinal arc"; ofst->SetProperties(kError, kError); } ofst->SetFinal(s, final_arc.weight); break; } case MAP_ALLOW_SUPERFINAL: { B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); if (final_arc.ilabel != 0 || final_arc.olabel != 0) { // Add a superfinal state if not already done. if (superfinal == kNoStateId) { superfinal = ofst->AddState(); ofst->SetFinal(superfinal); } final_arc.nextstate = superfinal; ofst->AddArc(s, std::move(final_arc)); ofst->SetFinal(s, B::Weight::Zero()); } else { ofst->SetFinal(s, final_arc.weight); } break; } case MAP_REQUIRE_SUPERFINAL: { B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); if (final_arc.ilabel != 0 || final_arc.olabel != 0 || final_arc.weight != B::Weight::Zero()) { ofst->AddArc(s, B(final_arc.ilabel, final_arc.olabel, final_arc.weight, superfinal)); } ofst->SetFinal(s, B::Weight::Zero()); break; } } } const auto oprops = ofst->Properties(kFstProperties, false); ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties); } // Maps an arc type A to an arc type B using mapper function // object C, passed by value. This version writes the mapped input // Fst to an output MutableFst. template void ArcMap(const Fst &ifst, MutableFst *ofst, C mapper) { ArcMap(ifst, ofst, &mapper); } struct ArcMapFstOptions : public CacheOptions { // ArcMapFst default caching behaviour is to do no caching. Most mappers are // cheap and therefore we save memory by not doing caching. ArcMapFstOptions() : CacheOptions(true, 0) {} explicit ArcMapFstOptions(const CacheOptions &opts) : CacheOptions(opts) {} }; template class ArcMapFst; namespace internal { // ExtractOr::type evaluates to E if possible. Otherwise, // std::false_type. template